1 /******************************************************************************
3 Copyright (c) 2006-2013, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $
30 ***************************************************************************/
32 #include "opt_ifpoll.h"
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/linker.h>
38 #include <sys/firmware.h>
39 #include <sys/endian.h>
40 #include <sys/in_cksum.h>
41 #include <sys/sockio.h>
43 #include <sys/malloc.h>
44 #include <sys/kernel.h>
45 #include <sys/module.h>
46 #include <sys/serialize.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
51 #include <net/if_arp.h>
52 #include <net/ifq_var.h>
53 #include <net/ethernet.h>
54 #include <net/if_dl.h>
55 #include <net/if_media.h>
56 #include <net/if_poll.h>
60 #include <net/if_types.h>
61 #include <net/vlan/if_vlan_var.h>
63 #include <net/toeplitz.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/tcp.h>
73 #include <bus/pci/pcireg.h>
74 #include <bus/pci/pcivar.h>
75 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
77 #include <vm/vm.h> /* for pmap_mapdev() */
80 #if defined(__x86_64__)
81 #include <machine/specialreg.h>
84 #include <dev/netif/mxge/mxge_mcp.h>
85 #include <dev/netif/mxge/mcp_gen_header.h>
86 #include <dev/netif/mxge/if_mxge_var.h>
88 #define MXGE_IFM (IFM_ETHER | IFM_FDX | IFM_ETH_FORCEPAUSE)
90 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD)
91 #define MXGE_HWRSS_KEYLEN 16
94 static int mxge_nvidia_ecrc_enable
= 1;
95 static int mxge_force_firmware
= 0;
96 static int mxge_intr_coal_delay
= MXGE_INTR_COAL_DELAY
;
97 static int mxge_deassert_wait
= 1;
98 static int mxge_ticks
;
99 static int mxge_num_slices
= 0;
100 static int mxge_always_promisc
= 0;
101 static int mxge_throttle
= 0;
102 static int mxge_msi_enable
= 1;
103 static int mxge_msix_enable
= 1;
104 static int mxge_multi_tx
= 1;
106 * Don't use RSS by default, its just too slow
108 static int mxge_use_rss
= 0;
110 static char mxge_flowctrl
[IFM_ETH_FC_STRLEN
] = IFM_ETH_FC_FORCE_FULL
;
112 static const char *mxge_fw_unaligned
= "mxge_ethp_z8e";
113 static const char *mxge_fw_aligned
= "mxge_eth_z8e";
114 static const char *mxge_fw_rss_aligned
= "mxge_rss_eth_z8e";
115 static const char *mxge_fw_rss_unaligned
= "mxge_rss_ethp_z8e";
117 TUNABLE_INT("hw.mxge.num_slices", &mxge_num_slices
);
118 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay
);
119 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable
);
120 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware
);
121 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait
);
122 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks
);
123 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc
);
124 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle
);
125 TUNABLE_INT("hw.mxge.multi_tx", &mxge_multi_tx
);
126 TUNABLE_INT("hw.mxge.use_rss", &mxge_use_rss
);
127 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable
);
128 TUNABLE_INT("hw.mxge.msix.enable", &mxge_msix_enable
);
129 TUNABLE_STR("hw.mxge.flow_ctrl", mxge_flowctrl
, sizeof(mxge_flowctrl
));
131 static int mxge_probe(device_t dev
);
132 static int mxge_attach(device_t dev
);
133 static int mxge_detach(device_t dev
);
134 static int mxge_shutdown(device_t dev
);
136 static int mxge_alloc_intr(struct mxge_softc
*sc
);
137 static void mxge_free_intr(struct mxge_softc
*sc
);
138 static int mxge_setup_intr(struct mxge_softc
*sc
);
139 static void mxge_teardown_intr(struct mxge_softc
*sc
, int cnt
);
141 static device_method_t mxge_methods
[] = {
142 /* Device interface */
143 DEVMETHOD(device_probe
, mxge_probe
),
144 DEVMETHOD(device_attach
, mxge_attach
),
145 DEVMETHOD(device_detach
, mxge_detach
),
146 DEVMETHOD(device_shutdown
, mxge_shutdown
),
150 static driver_t mxge_driver
= {
153 sizeof(mxge_softc_t
),
156 static devclass_t mxge_devclass
;
158 /* Declare ourselves to be a child of the PCI bus.*/
159 DRIVER_MODULE(mxge
, pci
, mxge_driver
, mxge_devclass
, NULL
, NULL
);
160 MODULE_DEPEND(mxge
, firmware
, 1, 1, 1);
161 MODULE_DEPEND(mxge
, zlib
, 1, 1, 1);
163 static int mxge_load_firmware(mxge_softc_t
*sc
, int adopt
);
164 static int mxge_send_cmd(mxge_softc_t
*sc
, uint32_t cmd
, mxge_cmd_t
*data
);
165 static void mxge_close(mxge_softc_t
*sc
, int down
);
166 static int mxge_open(mxge_softc_t
*sc
);
167 static void mxge_tick(void *arg
);
168 static void mxge_watchdog_reset(mxge_softc_t
*sc
);
169 static void mxge_warn_stuck(mxge_softc_t
*sc
, mxge_tx_ring_t
*tx
, int slice
);
172 mxge_probe(device_t dev
)
174 if (pci_get_vendor(dev
) == MXGE_PCI_VENDOR_MYRICOM
&&
175 (pci_get_device(dev
) == MXGE_PCI_DEVICE_Z8E
||
176 pci_get_device(dev
) == MXGE_PCI_DEVICE_Z8E_9
)) {
177 int rev
= pci_get_revid(dev
);
180 case MXGE_PCI_REV_Z8E
:
181 device_set_desc(dev
, "Myri10G-PCIE-8A");
183 case MXGE_PCI_REV_Z8ES
:
184 device_set_desc(dev
, "Myri10G-PCIE-8B");
187 device_set_desc(dev
, "Myri10G-PCIE-8??");
188 device_printf(dev
, "Unrecognized rev %d NIC\n", rev
);
197 mxge_enable_wc(mxge_softc_t
*sc
)
199 #if defined(__x86_64__)
203 len
= rman_get_size(sc
->mem_res
);
204 pmap_change_attr((vm_offset_t
) sc
->sram
, len
/ PAGE_SIZE
,
205 PAT_WRITE_COMBINING
);
210 mxge_dma_alloc(mxge_softc_t
*sc
, bus_dmamem_t
*dma
, size_t bytes
,
211 bus_size_t alignment
)
216 if (bytes
> 4096 && alignment
== 4096)
221 err
= bus_dmamem_coherent(sc
->parent_dmat
, alignment
, boundary
,
222 BUS_SPACE_MAXADDR
, BUS_SPACE_MAXADDR
, bytes
,
223 BUS_DMA_WAITOK
| BUS_DMA_ZERO
, dma
);
225 device_printf(sc
->dev
, "bus_dmamem_coherent failed: %d\n", err
);
232 mxge_dma_free(bus_dmamem_t
*dma
)
234 bus_dmamap_unload(dma
->dmem_tag
, dma
->dmem_map
);
235 bus_dmamem_free(dma
->dmem_tag
, dma
->dmem_addr
, dma
->dmem_map
);
236 bus_dma_tag_destroy(dma
->dmem_tag
);
240 * The eeprom strings on the lanaiX have the format
246 mxge_parse_strings(mxge_softc_t
*sc
)
249 int i
, found_mac
, found_sn2
;
252 ptr
= sc
->eeprom_strings
;
255 while (*ptr
!= '\0') {
256 if (strncmp(ptr
, "MAC=", 4) == 0) {
259 sc
->mac_addr
[i
] = strtoul(ptr
, &endptr
, 16);
260 if (endptr
- ptr
!= 2)
269 } else if (strncmp(ptr
, "PC=", 3) == 0) {
271 strlcpy(sc
->product_code_string
, ptr
,
272 sizeof(sc
->product_code_string
));
273 } else if (!found_sn2
&& (strncmp(ptr
, "SN=", 3) == 0)) {
275 strlcpy(sc
->serial_number_string
, ptr
,
276 sizeof(sc
->serial_number_string
));
277 } else if (strncmp(ptr
, "SN2=", 4) == 0) {
278 /* SN2 takes precedence over SN */
281 strlcpy(sc
->serial_number_string
, ptr
,
282 sizeof(sc
->serial_number_string
));
284 while (*ptr
++ != '\0') {}
291 device_printf(sc
->dev
, "failed to parse eeprom_strings\n");
295 #if defined(__x86_64__)
298 mxge_enable_nvidia_ecrc(mxge_softc_t
*sc
)
301 unsigned long base
, off
;
303 device_t pdev
, mcp55
;
304 uint16_t vendor_id
, device_id
, word
;
305 uintptr_t bus
, slot
, func
, ivend
, idev
;
308 if (!mxge_nvidia_ecrc_enable
)
311 pdev
= device_get_parent(device_get_parent(sc
->dev
));
313 device_printf(sc
->dev
, "could not find parent?\n");
316 vendor_id
= pci_read_config(pdev
, PCIR_VENDOR
, 2);
317 device_id
= pci_read_config(pdev
, PCIR_DEVICE
, 2);
319 if (vendor_id
!= 0x10de)
324 if (device_id
== 0x005d) {
325 /* ck804, base address is magic */
327 } else if (device_id
>= 0x0374 && device_id
<= 0x378) {
328 /* mcp55, base address stored in chipset */
329 mcp55
= pci_find_bsf(0, 0, 0);
331 0x10de == pci_read_config(mcp55
, PCIR_VENDOR
, 2) &&
332 0x0369 == pci_read_config(mcp55
, PCIR_DEVICE
, 2)) {
333 word
= pci_read_config(mcp55
, 0x90, 2);
334 base
= ((unsigned long)word
& 0x7ffeU
) << 25;
342 * Test below is commented because it is believed that doing
343 * config read/write beyond 0xff will access the config space
344 * for the next larger function. Uncomment this and remove
345 * the hacky pmap_mapdev() way of accessing config space when
346 * DragonFly grows support for extended pcie config space access.
350 * See if we can, by some miracle, access the extended
353 val
= pci_read_config(pdev
, 0x178, 4);
354 if (val
!= 0xffffffff) {
356 pci_write_config(pdev
, 0x178, val
, 4);
361 * Rather than using normal pci config space writes, we must
362 * map the Nvidia config space ourselves. This is because on
363 * opteron/nvidia class machine the 0xe000000 mapping is
364 * handled by the nvidia chipset, that means the internal PCI
365 * device (the on-chip northbridge), or the amd-8131 bridge
366 * and things behind them are not visible by this method.
369 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
371 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
372 PCI_IVAR_SLOT
, &slot
);
373 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
374 PCI_IVAR_FUNCTION
, &func
);
375 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
376 PCI_IVAR_VENDOR
, &ivend
);
377 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
378 PCI_IVAR_DEVICE
, &idev
);
380 off
= base
+ 0x00100000UL
* (unsigned long)bus
+
381 0x00001000UL
* (unsigned long)(func
+ 8 * slot
);
383 /* map it into the kernel */
384 va
= pmap_mapdev(trunc_page((vm_paddr_t
)off
), PAGE_SIZE
);
386 device_printf(sc
->dev
, "pmap_kenter_temporary didn't\n");
389 /* get a pointer to the config space mapped into the kernel */
390 cfgptr
= va
+ (off
& PAGE_MASK
);
392 /* make sure that we can really access it */
393 vendor_id
= *(uint16_t *)(cfgptr
+ PCIR_VENDOR
);
394 device_id
= *(uint16_t *)(cfgptr
+ PCIR_DEVICE
);
395 if (!(vendor_id
== ivend
&& device_id
== idev
)) {
396 device_printf(sc
->dev
, "mapping failed: 0x%x:0x%x\n",
397 vendor_id
, device_id
);
398 pmap_unmapdev((vm_offset_t
)va
, PAGE_SIZE
);
402 ptr32
= (uint32_t*)(cfgptr
+ 0x178);
405 if (val
== 0xffffffff) {
406 device_printf(sc
->dev
, "extended mapping failed\n");
407 pmap_unmapdev((vm_offset_t
)va
, PAGE_SIZE
);
411 pmap_unmapdev((vm_offset_t
)va
, PAGE_SIZE
);
413 device_printf(sc
->dev
, "Enabled ECRC on upstream "
414 "Nvidia bridge at %d:%d:%d\n",
415 (int)bus
, (int)slot
, (int)func
);
419 #else /* __x86_64__ */
422 mxge_enable_nvidia_ecrc(mxge_softc_t
*sc
)
424 device_printf(sc
->dev
, "Nforce 4 chipset on non-x86/x86_64!?!?!\n");
430 mxge_dma_test(mxge_softc_t
*sc
, int test_type
)
433 bus_addr_t dmatest_bus
= sc
->dmabench_dma
.dmem_busaddr
;
436 const char *test
= " ";
439 * Run a small DMA test.
440 * The magic multipliers to the length tell the firmware
441 * to do DMA read, write, or read+write tests. The
442 * results are returned in cmd.data0. The upper 16
443 * bits of the return is the number of transfers completed.
444 * The lower 16 bits is the time in 0.5us ticks that the
445 * transfers took to complete.
448 len
= sc
->tx_boundary
;
450 cmd
.data0
= MXGE_LOWPART_TO_U32(dmatest_bus
);
451 cmd
.data1
= MXGE_HIGHPART_TO_U32(dmatest_bus
);
452 cmd
.data2
= len
* 0x10000;
453 status
= mxge_send_cmd(sc
, test_type
, &cmd
);
458 sc
->read_dma
= ((cmd
.data0
>>16) * len
* 2) / (cmd
.data0
& 0xffff);
460 cmd
.data0
= MXGE_LOWPART_TO_U32(dmatest_bus
);
461 cmd
.data1
= MXGE_HIGHPART_TO_U32(dmatest_bus
);
462 cmd
.data2
= len
* 0x1;
463 status
= mxge_send_cmd(sc
, test_type
, &cmd
);
468 sc
->write_dma
= ((cmd
.data0
>>16) * len
* 2) / (cmd
.data0
& 0xffff);
470 cmd
.data0
= MXGE_LOWPART_TO_U32(dmatest_bus
);
471 cmd
.data1
= MXGE_HIGHPART_TO_U32(dmatest_bus
);
472 cmd
.data2
= len
* 0x10001;
473 status
= mxge_send_cmd(sc
, test_type
, &cmd
);
478 sc
->read_write_dma
= ((cmd
.data0
>>16) * len
* 2 * 2) /
479 (cmd
.data0
& 0xffff);
482 if (status
!= 0 && test_type
!= MXGEFW_CMD_UNALIGNED_TEST
) {
483 device_printf(sc
->dev
, "DMA %s benchmark failed: %d\n",
490 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
491 * when the PCI-E Completion packets are aligned on an 8-byte
492 * boundary. Some PCI-E chip sets always align Completion packets; on
493 * the ones that do not, the alignment can be enforced by enabling
494 * ECRC generation (if supported).
496 * When PCI-E Completion packets are not aligned, it is actually more
497 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
499 * If the driver can neither enable ECRC nor verify that it has
500 * already been enabled, then it must use a firmware image which works
501 * around unaligned completion packets (ethp_z8e.dat), and it should
502 * also ensure that it never gives the device a Read-DMA which is
503 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
504 * enabled, then the driver should use the aligned (eth_z8e.dat)
505 * firmware image, and set tx_boundary to 4KB.
508 mxge_firmware_probe(mxge_softc_t
*sc
)
510 device_t dev
= sc
->dev
;
514 sc
->tx_boundary
= 4096;
517 * Verify the max read request size was set to 4KB
518 * before trying the test with 4KB.
520 if (pci_find_extcap(dev
, PCIY_EXPRESS
, ®
) == 0) {
521 pectl
= pci_read_config(dev
, reg
+ 0x8, 2);
522 if ((pectl
& (5 << 12)) != (5 << 12)) {
523 device_printf(dev
, "Max Read Req. size != 4k (0x%x)\n",
525 sc
->tx_boundary
= 2048;
530 * Load the optimized firmware (which assumes aligned PCIe
531 * completions) in order to see if it works on this host.
533 sc
->fw_name
= mxge_fw_aligned
;
534 status
= mxge_load_firmware(sc
, 1);
539 * Enable ECRC if possible
541 mxge_enable_nvidia_ecrc(sc
);
544 * Run a DMA test which watches for unaligned completions and
545 * aborts on the first one seen. Not required on Z8ES or newer.
547 if (pci_get_revid(sc
->dev
) >= MXGE_PCI_REV_Z8ES
)
550 status
= mxge_dma_test(sc
, MXGEFW_CMD_UNALIGNED_TEST
);
552 return 0; /* keep the aligned firmware */
555 device_printf(dev
, "DMA test failed: %d\n", status
);
556 if (status
== ENOSYS
) {
557 device_printf(dev
, "Falling back to ethp! "
558 "Please install up to date fw\n");
564 mxge_select_firmware(mxge_softc_t
*sc
)
567 int force_firmware
= mxge_force_firmware
;
570 force_firmware
= sc
->throttle
;
572 if (force_firmware
!= 0) {
573 if (force_firmware
== 1)
578 device_printf(sc
->dev
,
579 "Assuming %s completions (forced)\n",
580 aligned
? "aligned" : "unaligned");
586 * If the PCIe link width is 4 or less, we can use the aligned
587 * firmware and skip any checks
589 if (sc
->link_width
!= 0 && sc
->link_width
<= 4) {
590 device_printf(sc
->dev
, "PCIe x%d Link, "
591 "expect reduced performance\n", sc
->link_width
);
596 if (mxge_firmware_probe(sc
) == 0)
601 sc
->fw_name
= mxge_fw_aligned
;
602 sc
->tx_boundary
= 4096;
604 sc
->fw_name
= mxge_fw_unaligned
;
605 sc
->tx_boundary
= 2048;
607 return mxge_load_firmware(sc
, 0);
611 mxge_validate_firmware(mxge_softc_t
*sc
, const mcp_gen_header_t
*hdr
)
613 if (be32toh(hdr
->mcp_type
) != MCP_TYPE_ETH
) {
614 if_printf(sc
->ifp
, "Bad firmware type: 0x%x\n",
615 be32toh(hdr
->mcp_type
));
619 /* Save firmware version for sysctl */
620 strlcpy(sc
->fw_version
, hdr
->version
, sizeof(sc
->fw_version
));
622 if_printf(sc
->ifp
, "firmware id: %s\n", hdr
->version
);
624 ksscanf(sc
->fw_version
, "%d.%d.%d", &sc
->fw_ver_major
,
625 &sc
->fw_ver_minor
, &sc
->fw_ver_tiny
);
627 if (!(sc
->fw_ver_major
== MXGEFW_VERSION_MAJOR
&&
628 sc
->fw_ver_minor
== MXGEFW_VERSION_MINOR
)) {
629 if_printf(sc
->ifp
, "Found firmware version %s\n",
631 if_printf(sc
->ifp
, "Driver needs %d.%d\n",
632 MXGEFW_VERSION_MAJOR
, MXGEFW_VERSION_MINOR
);
639 z_alloc(void *nil
, u_int items
, u_int size
)
641 return kmalloc(items
* size
, M_TEMP
, M_WAITOK
);
645 z_free(void *nil
, void *ptr
)
651 mxge_load_firmware_helper(mxge_softc_t
*sc
, uint32_t *limit
)
654 char *inflate_buffer
;
655 const struct firmware
*fw
;
656 const mcp_gen_header_t
*hdr
;
663 fw
= firmware_get(sc
->fw_name
);
665 if_printf(sc
->ifp
, "Could not find firmware image %s\n",
670 /* Setup zlib and decompress f/w */
671 bzero(&zs
, sizeof(zs
));
674 status
= inflateInit(&zs
);
675 if (status
!= Z_OK
) {
681 * The uncompressed size is stored as the firmware version,
682 * which would otherwise go unused
684 fw_len
= (size_t)fw
->version
;
685 inflate_buffer
= kmalloc(fw_len
, M_TEMP
, M_WAITOK
);
686 zs
.avail_in
= fw
->datasize
;
687 zs
.next_in
= __DECONST(char *, fw
->data
);
688 zs
.avail_out
= fw_len
;
689 zs
.next_out
= inflate_buffer
;
690 status
= inflate(&zs
, Z_FINISH
);
691 if (status
!= Z_STREAM_END
) {
692 if_printf(sc
->ifp
, "zlib %d\n", status
);
694 goto abort_with_buffer
;
699 htobe32(*(const uint32_t *)(inflate_buffer
+ MCP_HEADER_PTR_OFFSET
));
700 if ((hdr_offset
& 3) || hdr_offset
+ sizeof(*hdr
) > fw_len
) {
701 if_printf(sc
->ifp
, "Bad firmware file");
703 goto abort_with_buffer
;
705 hdr
= (const void*)(inflate_buffer
+ hdr_offset
);
707 status
= mxge_validate_firmware(sc
, hdr
);
709 goto abort_with_buffer
;
711 /* Copy the inflated firmware to NIC SRAM. */
712 for (i
= 0; i
< fw_len
; i
+= 256) {
713 mxge_pio_copy(sc
->sram
+ MXGE_FW_OFFSET
+ i
, inflate_buffer
+ i
,
714 min(256U, (unsigned)(fw_len
- i
)));
723 kfree(inflate_buffer
, M_TEMP
);
726 firmware_put(fw
, FIRMWARE_UNLOAD
);
731 * Enable or disable periodic RDMAs from the host to make certain
732 * chipsets resend dropped PCIe messages
735 mxge_dummy_rdma(mxge_softc_t
*sc
, int enable
)
738 volatile uint32_t *confirm
;
739 volatile char *submit
;
740 uint32_t *buf
, dma_low
, dma_high
;
743 buf
= (uint32_t *)((unsigned long)(buf_bytes
+ 7) & ~7UL);
745 /* Clear confirmation addr */
746 confirm
= (volatile uint32_t *)sc
->cmd
;
751 * Send an rdma command to the PCIe engine, and wait for the
752 * response in the confirmation address. The firmware should
753 * write a -1 there to indicate it is alive and well
755 dma_low
= MXGE_LOWPART_TO_U32(sc
->cmd_dma
.dmem_busaddr
);
756 dma_high
= MXGE_HIGHPART_TO_U32(sc
->cmd_dma
.dmem_busaddr
);
757 buf
[0] = htobe32(dma_high
); /* confirm addr MSW */
758 buf
[1] = htobe32(dma_low
); /* confirm addr LSW */
759 buf
[2] = htobe32(0xffffffff); /* confirm data */
760 dma_low
= MXGE_LOWPART_TO_U32(sc
->zeropad_dma
.dmem_busaddr
);
761 dma_high
= MXGE_HIGHPART_TO_U32(sc
->zeropad_dma
.dmem_busaddr
);
762 buf
[3] = htobe32(dma_high
); /* dummy addr MSW */
763 buf
[4] = htobe32(dma_low
); /* dummy addr LSW */
764 buf
[5] = htobe32(enable
); /* enable? */
766 submit
= (volatile char *)(sc
->sram
+ MXGEFW_BOOT_DUMMY_RDMA
);
768 mxge_pio_copy(submit
, buf
, 64);
773 while (*confirm
!= 0xffffffff && i
< 20) {
777 if (*confirm
!= 0xffffffff) {
778 if_printf(sc
->ifp
, "dummy rdma %s failed (%p = 0x%x)",
779 (enable
? "enable" : "disable"), confirm
, *confirm
);
784 mxge_send_cmd(mxge_softc_t
*sc
, uint32_t cmd
, mxge_cmd_t
*data
)
787 char buf_bytes
[sizeof(*buf
) + 8];
788 volatile mcp_cmd_response_t
*response
= sc
->cmd
;
789 volatile char *cmd_addr
= sc
->sram
+ MXGEFW_ETH_CMD
;
790 uint32_t dma_low
, dma_high
;
791 int err
, sleep_total
= 0;
793 /* Ensure buf is aligned to 8 bytes */
794 buf
= (mcp_cmd_t
*)((unsigned long)(buf_bytes
+ 7) & ~7UL);
796 buf
->data0
= htobe32(data
->data0
);
797 buf
->data1
= htobe32(data
->data1
);
798 buf
->data2
= htobe32(data
->data2
);
799 buf
->cmd
= htobe32(cmd
);
800 dma_low
= MXGE_LOWPART_TO_U32(sc
->cmd_dma
.dmem_busaddr
);
801 dma_high
= MXGE_HIGHPART_TO_U32(sc
->cmd_dma
.dmem_busaddr
);
803 buf
->response_addr
.low
= htobe32(dma_low
);
804 buf
->response_addr
.high
= htobe32(dma_high
);
806 response
->result
= 0xffffffff;
808 mxge_pio_copy((volatile void *)cmd_addr
, buf
, sizeof (*buf
));
814 for (sleep_total
= 0; sleep_total
< 20; sleep_total
++) {
816 switch (be32toh(response
->result
)) {
818 data
->data0
= be32toh(response
->data
);
824 case MXGEFW_CMD_UNKNOWN
:
827 case MXGEFW_CMD_ERROR_UNALIGNED
:
830 case MXGEFW_CMD_ERROR_BUSY
:
833 case MXGEFW_CMD_ERROR_I2C_ABSENT
:
837 if_printf(sc
->ifp
, "command %d failed, result = %d\n",
838 cmd
, be32toh(response
->result
));
846 if_printf(sc
->ifp
, "command %d timed out result = %d\n",
847 cmd
, be32toh(response
->result
));
853 mxge_adopt_running_firmware(mxge_softc_t
*sc
)
855 struct mcp_gen_header
*hdr
;
856 const size_t bytes
= sizeof(struct mcp_gen_header
);
861 * Find running firmware header
864 htobe32(*(volatile uint32_t *)(sc
->sram
+ MCP_HEADER_PTR_OFFSET
));
866 if ((hdr_offset
& 3) || hdr_offset
+ sizeof(*hdr
) > sc
->sram_size
) {
867 if_printf(sc
->ifp
, "Running firmware has bad header offset "
868 "(%zu)\n", hdr_offset
);
873 * Copy header of running firmware from SRAM to host memory to
876 hdr
= kmalloc(bytes
, M_DEVBUF
, M_WAITOK
);
877 bus_space_read_region_1(rman_get_bustag(sc
->mem_res
),
878 rman_get_bushandle(sc
->mem_res
), hdr_offset
, (char *)hdr
, bytes
);
879 status
= mxge_validate_firmware(sc
, hdr
);
880 kfree(hdr
, M_DEVBUF
);
883 * Check to see if adopted firmware has bug where adopting
884 * it will cause broadcasts to be filtered unless the NIC
885 * is kept in ALLMULTI mode
887 if (sc
->fw_ver_major
== 1 && sc
->fw_ver_minor
== 4 &&
888 sc
->fw_ver_tiny
>= 4 && sc
->fw_ver_tiny
<= 11) {
889 sc
->adopted_rx_filter_bug
= 1;
890 if_printf(sc
->ifp
, "Adopting fw %d.%d.%d: "
891 "working around rx filter bug\n",
892 sc
->fw_ver_major
, sc
->fw_ver_minor
, sc
->fw_ver_tiny
);
899 mxge_load_firmware(mxge_softc_t
*sc
, int adopt
)
901 volatile uint32_t *confirm
;
902 volatile char *submit
;
904 uint32_t *buf
, size
, dma_low
, dma_high
;
907 buf
= (uint32_t *)((unsigned long)(buf_bytes
+ 7) & ~7UL);
909 size
= sc
->sram_size
;
910 status
= mxge_load_firmware_helper(sc
, &size
);
916 * Try to use the currently running firmware, if
919 status
= mxge_adopt_running_firmware(sc
);
922 "failed to adopt running firmware\n");
925 if_printf(sc
->ifp
, "Successfully adopted running firmware\n");
927 if (sc
->tx_boundary
== 4096) {
929 "Using firmware currently running on NIC. "
931 if_printf(sc
->ifp
, "performance consider loading "
932 "optimized firmware\n");
934 sc
->fw_name
= mxge_fw_unaligned
;
935 sc
->tx_boundary
= 2048;
939 /* Clear confirmation addr */
940 confirm
= (volatile uint32_t *)sc
->cmd
;
945 * Send a reload command to the bootstrap MCP, and wait for the
946 * response in the confirmation address. The firmware should
947 * write a -1 there to indicate it is alive and well
950 dma_low
= MXGE_LOWPART_TO_U32(sc
->cmd_dma
.dmem_busaddr
);
951 dma_high
= MXGE_HIGHPART_TO_U32(sc
->cmd_dma
.dmem_busaddr
);
953 buf
[0] = htobe32(dma_high
); /* confirm addr MSW */
954 buf
[1] = htobe32(dma_low
); /* confirm addr LSW */
955 buf
[2] = htobe32(0xffffffff); /* confirm data */
958 * FIX: All newest firmware should un-protect the bottom of
959 * the sram before handoff. However, the very first interfaces
960 * do not. Therefore the handoff copy must skip the first 8 bytes
962 /* where the code starts*/
963 buf
[3] = htobe32(MXGE_FW_OFFSET
+ 8);
964 buf
[4] = htobe32(size
- 8); /* length of code */
965 buf
[5] = htobe32(8); /* where to copy to */
966 buf
[6] = htobe32(0); /* where to jump to */
968 submit
= (volatile char *)(sc
->sram
+ MXGEFW_BOOT_HANDOFF
);
969 mxge_pio_copy(submit
, buf
, 64);
974 while (*confirm
!= 0xffffffff && i
< 20) {
978 if (*confirm
!= 0xffffffff) {
979 if_printf(sc
->ifp
,"handoff failed (%p = 0x%x)",
987 mxge_update_mac_address(mxge_softc_t
*sc
)
990 uint8_t *addr
= sc
->mac_addr
;
992 cmd
.data0
= (addr
[0] << 24) | (addr
[1] << 16) |
993 (addr
[2] << 8) | addr
[3];
994 cmd
.data1
= (addr
[4] << 8) | (addr
[5]);
995 return mxge_send_cmd(sc
, MXGEFW_SET_MAC_ADDRESS
, &cmd
);
999 mxge_change_pause(mxge_softc_t
*sc
, int pause
)
1004 bzero(&cmd
, sizeof(cmd
)); /* silence gcc warning */
1006 status
= mxge_send_cmd(sc
, MXGEFW_ENABLE_FLOW_CONTROL
, &cmd
);
1008 status
= mxge_send_cmd(sc
, MXGEFW_DISABLE_FLOW_CONTROL
, &cmd
);
1010 if_printf(sc
->ifp
, "Failed to set flow control mode\n");
1018 mxge_change_promisc(mxge_softc_t
*sc
, int promisc
)
1023 bzero(&cmd
, sizeof(cmd
)); /* avoid gcc warning */
1024 if (mxge_always_promisc
)
1028 status
= mxge_send_cmd(sc
, MXGEFW_ENABLE_PROMISC
, &cmd
);
1030 status
= mxge_send_cmd(sc
, MXGEFW_DISABLE_PROMISC
, &cmd
);
1032 if_printf(sc
->ifp
, "Failed to set promisc mode\n");
1036 mxge_set_multicast_list(mxge_softc_t
*sc
)
1039 struct ifmultiaddr
*ifma
;
1040 struct ifnet
*ifp
= sc
->ifp
;
1043 /* This firmware is known to not support multicast */
1044 if (!sc
->fw_multicast_support
)
1047 /* Disable multicast filtering while we play with the lists*/
1048 bzero(&cmd
, sizeof(cmd
)); /* silence gcc warning */
1049 err
= mxge_send_cmd(sc
, MXGEFW_ENABLE_ALLMULTI
, &cmd
);
1051 if_printf(ifp
, "Failed MXGEFW_ENABLE_ALLMULTI, "
1052 "error status: %d\n", err
);
1056 if (sc
->adopted_rx_filter_bug
)
1059 if (ifp
->if_flags
& IFF_ALLMULTI
) {
1060 /* Request to disable multicast filtering, so quit here */
1064 /* Flush all the filters */
1065 err
= mxge_send_cmd(sc
, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS
, &cmd
);
1067 if_printf(ifp
, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, "
1068 "error status: %d\n", err
);
1073 * Walk the multicast list, and add each address
1075 TAILQ_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
1076 if (ifma
->ifma_addr
->sa_family
!= AF_LINK
)
1079 bcopy(LLADDR((struct sockaddr_dl
*)ifma
->ifma_addr
),
1081 bcopy(LLADDR((struct sockaddr_dl
*)ifma
->ifma_addr
) + 4,
1083 cmd
.data0
= htonl(cmd
.data0
);
1084 cmd
.data1
= htonl(cmd
.data1
);
1085 err
= mxge_send_cmd(sc
, MXGEFW_JOIN_MULTICAST_GROUP
, &cmd
);
1087 if_printf(ifp
, "Failed MXGEFW_JOIN_MULTICAST_GROUP, "
1088 "error status: %d\n", err
);
1089 /* Abort, leaving multicast filtering off */
1094 /* Enable multicast filtering */
1095 err
= mxge_send_cmd(sc
, MXGEFW_DISABLE_ALLMULTI
, &cmd
);
1097 if_printf(ifp
, "Failed MXGEFW_DISABLE_ALLMULTI, "
1098 "error status: %d\n", err
);
1104 mxge_max_mtu(mxge_softc_t
*sc
)
1109 if (MJUMPAGESIZE
- MXGEFW_PAD
> MXGEFW_MAX_MTU
)
1110 return MXGEFW_MAX_MTU
- MXGEFW_PAD
;
1112 /* try to set nbufs to see if it we can
1113 use virtually contiguous jumbos */
1115 status
= mxge_send_cmd(sc
, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS
,
1118 return MXGEFW_MAX_MTU
- MXGEFW_PAD
;
1120 /* otherwise, we're limited to MJUMPAGESIZE */
1121 return MJUMPAGESIZE
- MXGEFW_PAD
;
1126 mxge_reset(mxge_softc_t
*sc
, int interrupts_setup
)
1128 struct mxge_slice_state
*ss
;
1129 mxge_rx_done_t
*rx_done
;
1130 volatile uint32_t *irq_claim
;
1132 int slice
, status
, rx_intr_size
;
1135 * Try to send a reset command to the card to see if it
1138 memset(&cmd
, 0, sizeof (cmd
));
1139 status
= mxge_send_cmd(sc
, MXGEFW_CMD_RESET
, &cmd
);
1141 if_printf(sc
->ifp
, "failed reset\n");
1145 mxge_dummy_rdma(sc
, 1);
1148 * Set the intrq size
1149 * XXX assume 4byte mcp_slot
1151 rx_intr_size
= sc
->rx_intr_slots
* sizeof(mcp_slot_t
);
1152 cmd
.data0
= rx_intr_size
;
1153 status
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_INTRQ_SIZE
, &cmd
);
1156 * Even though we already know how many slices are supported
1157 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1158 * has magic side effects, and must be called after a reset.
1159 * It must be called prior to calling any RSS related cmds,
1160 * including assigning an interrupt queue for anything but
1161 * slice 0. It must also be called *after*
1162 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1163 * the firmware to compute offsets.
1165 if (sc
->num_slices
> 1) {
1166 /* Ask the maximum number of slices it supports */
1167 status
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_MAX_RSS_QUEUES
, &cmd
);
1169 if_printf(sc
->ifp
, "failed to get number of slices\n");
1174 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1175 * to setting up the interrupt queue DMA
1177 cmd
.data0
= sc
->num_slices
;
1178 cmd
.data1
= MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE
;
1179 if (sc
->num_tx_rings
> 1)
1180 cmd
.data1
|= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES
;
1181 status
= mxge_send_cmd(sc
, MXGEFW_CMD_ENABLE_RSS_QUEUES
, &cmd
);
1183 if_printf(sc
->ifp
, "failed to set number of slices\n");
1188 if (interrupts_setup
) {
1189 /* Now exchange information about interrupts */
1190 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
1191 ss
= &sc
->ss
[slice
];
1193 rx_done
= &ss
->rx_data
.rx_done
;
1194 memset(rx_done
->entry
, 0, rx_intr_size
);
1197 MXGE_LOWPART_TO_U32(ss
->rx_done_dma
.dmem_busaddr
);
1199 MXGE_HIGHPART_TO_U32(ss
->rx_done_dma
.dmem_busaddr
);
1201 status
|= mxge_send_cmd(sc
, MXGEFW_CMD_SET_INTRQ_DMA
,
1206 status
|= mxge_send_cmd(sc
, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET
,
1208 sc
->intr_coal_delay_ptr
= (volatile uint32_t *)(sc
->sram
+ cmd
.data0
);
1210 status
|= mxge_send_cmd(sc
, MXGEFW_CMD_GET_IRQ_ACK_OFFSET
, &cmd
);
1211 irq_claim
= (volatile uint32_t *)(sc
->sram
+ cmd
.data0
);
1213 status
|= mxge_send_cmd(sc
, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET
, &cmd
);
1214 sc
->irq_deassert
= (volatile uint32_t *)(sc
->sram
+ cmd
.data0
);
1217 if_printf(sc
->ifp
, "failed set interrupt parameters\n");
1221 *sc
->intr_coal_delay_ptr
= htobe32(sc
->intr_coal_delay
);
1223 /* Run a DMA benchmark */
1224 mxge_dma_test(sc
, MXGEFW_DMA_TEST
);
1226 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
1227 ss
= &sc
->ss
[slice
];
1229 ss
->irq_claim
= irq_claim
+ (2 * slice
);
1231 /* Reset mcp/driver shared state back to 0 */
1232 ss
->rx_data
.rx_done
.idx
= 0;
1235 ss
->tx
.pkt_done
= 0;
1236 ss
->tx
.queue_active
= 0;
1237 ss
->tx
.activate
= 0;
1238 ss
->tx
.deactivate
= 0;
1239 ss
->rx_data
.rx_big
.cnt
= 0;
1240 ss
->rx_data
.rx_small
.cnt
= 0;
1241 if (ss
->fw_stats
!= NULL
)
1242 bzero(ss
->fw_stats
, sizeof(*ss
->fw_stats
));
1244 sc
->rdma_tags_available
= 15;
1246 status
= mxge_update_mac_address(sc
);
1247 mxge_change_promisc(sc
, sc
->ifp
->if_flags
& IFF_PROMISC
);
1248 mxge_change_pause(sc
, sc
->pause
);
1249 mxge_set_multicast_list(sc
);
1252 cmd
.data0
= sc
->throttle
;
1253 if (mxge_send_cmd(sc
, MXGEFW_CMD_SET_THROTTLE_FACTOR
, &cmd
))
1254 if_printf(sc
->ifp
, "can't enable throttle\n");
1260 mxge_change_throttle(SYSCTL_HANDLER_ARGS
)
1265 unsigned int throttle
;
1268 throttle
= sc
->throttle
;
1269 err
= sysctl_handle_int(oidp
, &throttle
, arg2
, req
);
1273 if (throttle
== sc
->throttle
)
1276 if (throttle
< MXGE_MIN_THROTTLE
|| throttle
> MXGE_MAX_THROTTLE
)
1279 ifnet_serialize_all(sc
->ifp
);
1281 cmd
.data0
= throttle
;
1282 err
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_THROTTLE_FACTOR
, &cmd
);
1284 sc
->throttle
= throttle
;
1286 ifnet_deserialize_all(sc
->ifp
);
1291 mxge_change_use_rss(SYSCTL_HANDLER_ARGS
)
1297 use_rss
= sc
->use_rss
;
1298 err
= sysctl_handle_int(oidp
, &use_rss
, arg2
, req
);
1302 if (use_rss
== sc
->use_rss
)
1305 ifnet_serialize_all(sc
->ifp
);
1307 sc
->use_rss
= use_rss
;
1308 if (sc
->ifp
->if_flags
& IFF_RUNNING
) {
1313 ifnet_deserialize_all(sc
->ifp
);
1318 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS
)
1321 unsigned int intr_coal_delay
;
1325 intr_coal_delay
= sc
->intr_coal_delay
;
1326 err
= sysctl_handle_int(oidp
, &intr_coal_delay
, arg2
, req
);
1330 if (intr_coal_delay
== sc
->intr_coal_delay
)
1333 if (intr_coal_delay
== 0 || intr_coal_delay
> 1000*1000)
1336 ifnet_serialize_all(sc
->ifp
);
1338 *sc
->intr_coal_delay_ptr
= htobe32(intr_coal_delay
);
1339 sc
->intr_coal_delay
= intr_coal_delay
;
1341 ifnet_deserialize_all(sc
->ifp
);
1346 mxge_handle_be32(SYSCTL_HANDLER_ARGS
)
1352 arg2
= be32toh(*(int *)arg1
);
1354 err
= sysctl_handle_int(oidp
, arg1
, arg2
, req
);
1360 mxge_rem_sysctls(mxge_softc_t
*sc
)
1362 if (sc
->ss
!= NULL
) {
1363 struct mxge_slice_state
*ss
;
1366 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
1367 ss
= &sc
->ss
[slice
];
1368 if (ss
->sysctl_tree
!= NULL
) {
1369 sysctl_ctx_free(&ss
->sysctl_ctx
);
1370 ss
->sysctl_tree
= NULL
;
1375 if (sc
->slice_sysctl_tree
!= NULL
) {
1376 sysctl_ctx_free(&sc
->slice_sysctl_ctx
);
1377 sc
->slice_sysctl_tree
= NULL
;
1382 mxge_add_sysctls(mxge_softc_t
*sc
)
1384 struct sysctl_ctx_list
*ctx
;
1385 struct sysctl_oid_list
*children
;
1387 struct mxge_slice_state
*ss
;
1391 ctx
= device_get_sysctl_ctx(sc
->dev
);
1392 children
= SYSCTL_CHILDREN(device_get_sysctl_tree(sc
->dev
));
1393 fw
= sc
->ss
[0].fw_stats
;
1396 * Random information
1398 SYSCTL_ADD_STRING(ctx
, children
, OID_AUTO
, "firmware_version",
1399 CTLFLAG_RD
, &sc
->fw_version
, 0, "firmware version");
1401 SYSCTL_ADD_STRING(ctx
, children
, OID_AUTO
, "serial_number",
1402 CTLFLAG_RD
, &sc
->serial_number_string
, 0, "serial number");
1404 SYSCTL_ADD_STRING(ctx
, children
, OID_AUTO
, "product_code",
1405 CTLFLAG_RD
, &sc
->product_code_string
, 0, "product code");
1407 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "pcie_link_width",
1408 CTLFLAG_RD
, &sc
->link_width
, 0, "link width");
1410 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "tx_boundary",
1411 CTLFLAG_RD
, &sc
->tx_boundary
, 0, "tx boundary");
1413 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "write_combine",
1414 CTLFLAG_RD
, &sc
->wc
, 0, "write combining PIO");
1416 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "read_dma_MBs",
1417 CTLFLAG_RD
, &sc
->read_dma
, 0, "DMA Read speed in MB/s");
1419 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "write_dma_MBs",
1420 CTLFLAG_RD
, &sc
->write_dma
, 0, "DMA Write speed in MB/s");
1422 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "read_write_dma_MBs",
1423 CTLFLAG_RD
, &sc
->read_write_dma
, 0,
1424 "DMA concurrent Read/Write speed in MB/s");
1426 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "watchdog_resets",
1427 CTLFLAG_RD
, &sc
->watchdog_resets
, 0,
1428 "Number of times NIC was reset");
1431 * Performance related tunables
1433 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "intr_coal_delay",
1434 CTLTYPE_INT
|CTLFLAG_RW
, sc
, 0, mxge_change_intr_coal
, "I",
1435 "Interrupt coalescing delay in usecs");
1437 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "throttle",
1438 CTLTYPE_INT
|CTLFLAG_RW
, sc
, 0, mxge_change_throttle
, "I",
1439 "Transmit throttling");
1441 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "use_rss",
1442 CTLTYPE_INT
|CTLFLAG_RW
, sc
, 0, mxge_change_use_rss
, "I",
1445 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "deassert_wait",
1446 CTLFLAG_RW
, &mxge_deassert_wait
, 0,
1447 "Wait for IRQ line to go low in ihandler");
1450 * Stats block from firmware is in network byte order.
1453 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "link_up",
1454 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->link_up
, 0,
1455 mxge_handle_be32
, "I", "link up");
1457 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "rdma_tags_available",
1458 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->rdma_tags_available
, 0,
1459 mxge_handle_be32
, "I", "rdma_tags_available");
1461 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_bad_crc32",
1462 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_bad_crc32
, 0,
1463 mxge_handle_be32
, "I", "dropped_bad_crc32");
1465 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_bad_phy",
1466 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_bad_phy
, 0,
1467 mxge_handle_be32
, "I", "dropped_bad_phy");
1469 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_link_error_or_filtered",
1470 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_link_error_or_filtered
, 0,
1471 mxge_handle_be32
, "I", "dropped_link_error_or_filtered");
1473 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_link_overflow",
1474 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_link_overflow
, 0,
1475 mxge_handle_be32
, "I", "dropped_link_overflow");
1477 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_multicast_filtered",
1478 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_multicast_filtered
, 0,
1479 mxge_handle_be32
, "I", "dropped_multicast_filtered");
1481 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_no_big_buffer",
1482 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_no_big_buffer
, 0,
1483 mxge_handle_be32
, "I", "dropped_no_big_buffer");
1485 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_no_small_buffer",
1486 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_no_small_buffer
, 0,
1487 mxge_handle_be32
, "I", "dropped_no_small_buffer");
1489 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_overrun",
1490 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_overrun
, 0,
1491 mxge_handle_be32
, "I", "dropped_overrun");
1493 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_pause",
1494 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_pause
, 0,
1495 mxge_handle_be32
, "I", "dropped_pause");
1497 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_runt",
1498 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_runt
, 0,
1499 mxge_handle_be32
, "I", "dropped_runt");
1501 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
, "dropped_unicast_filtered",
1502 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_unicast_filtered
, 0,
1503 mxge_handle_be32
, "I", "dropped_unicast_filtered");
1505 /* add counters exported for debugging from all slices */
1506 sysctl_ctx_init(&sc
->slice_sysctl_ctx
);
1507 sc
->slice_sysctl_tree
= SYSCTL_ADD_NODE(&sc
->slice_sysctl_ctx
,
1508 children
, OID_AUTO
, "slice", CTLFLAG_RD
, 0, "");
1509 if (sc
->slice_sysctl_tree
== NULL
) {
1510 device_printf(sc
->dev
, "can't add slice sysctl node\n");
1514 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
1515 ss
= &sc
->ss
[slice
];
1516 sysctl_ctx_init(&ss
->sysctl_ctx
);
1517 ctx
= &ss
->sysctl_ctx
;
1518 children
= SYSCTL_CHILDREN(sc
->slice_sysctl_tree
);
1519 ksprintf(slice_num
, "%d", slice
);
1520 ss
->sysctl_tree
= SYSCTL_ADD_NODE(ctx
, children
, OID_AUTO
,
1521 slice_num
, CTLFLAG_RD
, 0, "");
1522 if (ss
->sysctl_tree
== NULL
) {
1523 device_printf(sc
->dev
,
1524 "can't add %d slice sysctl node\n", slice
);
1525 return; /* XXX continue? */
1527 children
= SYSCTL_CHILDREN(ss
->sysctl_tree
);
1530 * XXX change to ULONG
1533 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "rx_small_cnt",
1534 CTLFLAG_RD
, &ss
->rx_data
.rx_small
.cnt
, 0, "rx_small_cnt");
1536 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "rx_big_cnt",
1537 CTLFLAG_RD
, &ss
->rx_data
.rx_big
.cnt
, 0, "rx_small_cnt");
1539 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "tx_req",
1540 CTLFLAG_RD
, &ss
->tx
.req
, 0, "tx_req");
1542 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "tx_done",
1543 CTLFLAG_RD
, &ss
->tx
.done
, 0, "tx_done");
1545 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "tx_pkt_done",
1546 CTLFLAG_RD
, &ss
->tx
.pkt_done
, 0, "tx_done");
1548 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "tx_queue_active",
1549 CTLFLAG_RD
, &ss
->tx
.queue_active
, 0, "tx_queue_active");
1551 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "tx_activate",
1552 CTLFLAG_RD
, &ss
->tx
.activate
, 0, "tx_activate");
1554 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
, "tx_deactivate",
1555 CTLFLAG_RD
, &ss
->tx
.deactivate
, 0, "tx_deactivate");
1560 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1561 * backwards one at a time and handle ring wraps
1563 static __inline
void
1564 mxge_submit_req_backwards(mxge_tx_ring_t
*tx
,
1565 mcp_kreq_ether_send_t
*src
, int cnt
)
1567 int idx
, starting_slot
;
1569 starting_slot
= tx
->req
;
1572 idx
= (starting_slot
+ cnt
) & tx
->mask
;
1573 mxge_pio_copy(&tx
->lanai
[idx
], &src
[cnt
], sizeof(*src
));
1579 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1580 * at most 32 bytes at a time, so as to avoid involving the software
1581 * pio handler in the nic. We re-write the first segment's flags
1582 * to mark them valid only after writing the entire chain
1584 static __inline
void
1585 mxge_submit_req(mxge_tx_ring_t
*tx
, mcp_kreq_ether_send_t
*src
, int cnt
)
1589 volatile uint32_t *dst_ints
;
1590 mcp_kreq_ether_send_t
*srcp
;
1591 volatile mcp_kreq_ether_send_t
*dstp
, *dst
;
1594 idx
= tx
->req
& tx
->mask
;
1596 last_flags
= src
->flags
;
1599 dst
= dstp
= &tx
->lanai
[idx
];
1602 if ((idx
+ cnt
) < tx
->mask
) {
1603 for (i
= 0; i
< cnt
- 1; i
+= 2) {
1604 mxge_pio_copy(dstp
, srcp
, 2 * sizeof(*src
));
1605 wmb(); /* force write every 32 bytes */
1611 * Submit all but the first request, and ensure
1612 * that it is submitted below
1614 mxge_submit_req_backwards(tx
, src
, cnt
);
1618 /* Submit the first request */
1619 mxge_pio_copy(dstp
, srcp
, sizeof(*src
));
1620 wmb(); /* barrier before setting valid flag */
1623 /* Re-write the last 32-bits with the valid flags */
1624 src
->flags
= last_flags
;
1625 src_ints
= (uint32_t *)src
;
1627 dst_ints
= (volatile uint32_t *)dst
;
1629 *dst_ints
= *src_ints
;
1635 mxge_pullup_tso(struct mbuf
**mp
)
1637 int hoff
, iphlen
, thoff
;
1641 KASSERT(M_WRITABLE(m
), ("TSO mbuf not writable"));
1643 iphlen
= m
->m_pkthdr
.csum_iphlen
;
1644 thoff
= m
->m_pkthdr
.csum_thlen
;
1645 hoff
= m
->m_pkthdr
.csum_lhlen
;
1647 KASSERT(iphlen
> 0, ("invalid ip hlen"));
1648 KASSERT(thoff
> 0, ("invalid tcp hlen"));
1649 KASSERT(hoff
> 0, ("invalid ether hlen"));
1651 if (__predict_false(m
->m_len
< hoff
+ iphlen
+ thoff
)) {
1652 m
= m_pullup(m
, hoff
+ iphlen
+ thoff
);
1663 mxge_encap_tso(mxge_tx_ring_t
*tx
, struct mxge_buffer_state
*info_map
,
1664 struct mbuf
*m
, int busdma_seg_cnt
)
1666 mcp_kreq_ether_send_t
*req
;
1667 bus_dma_segment_t
*seg
;
1668 uint32_t low
, high_swapped
;
1669 int len
, seglen
, cum_len
, cum_len_next
;
1670 int next_is_first
, chop
, cnt
, rdma_count
, small
;
1671 uint16_t pseudo_hdr_offset
, cksum_offset
, mss
;
1672 uint8_t flags
, flags_next
;
1673 struct mxge_buffer_state
*info_last
;
1674 bus_dmamap_t map
= info_map
->map
;
1676 mss
= m
->m_pkthdr
.tso_segsz
;
1679 * Negative cum_len signifies to the send loop that we are
1680 * still in the header portion of the TSO packet.
1682 cum_len
= -(m
->m_pkthdr
.csum_lhlen
+ m
->m_pkthdr
.csum_iphlen
+
1683 m
->m_pkthdr
.csum_thlen
);
1686 * TSO implies checksum offload on this hardware
1688 cksum_offset
= m
->m_pkthdr
.csum_lhlen
+ m
->m_pkthdr
.csum_iphlen
;
1689 flags
= MXGEFW_FLAGS_TSO_HDR
| MXGEFW_FLAGS_FIRST
;
1692 * For TSO, pseudo_hdr_offset holds mss. The firmware figures
1693 * out where to put the checksum by parsing the header.
1695 pseudo_hdr_offset
= htobe16(mss
);
1703 * "rdma_count" is the number of RDMAs belonging to the current
1704 * packet BEFORE the current send request. For non-TSO packets,
1705 * this is equal to "count".
1707 * For TSO packets, rdma_count needs to be reset to 0 after a
1710 * The rdma_count field of the send request is the number of
1711 * RDMAs of the packet starting at that request. For TSO send
1712 * requests with one ore more cuts in the middle, this is the
1713 * number of RDMAs starting after the last cut in the request.
1714 * All previous segments before the last cut implicitly have 1
1717 * Since the number of RDMAs is not known beforehand, it must be
1718 * filled-in retroactively - after each segmentation cut or at
1719 * the end of the entire packet.
1722 while (busdma_seg_cnt
) {
1724 * Break the busdma segment up into pieces
1726 low
= MXGE_LOWPART_TO_U32(seg
->ds_addr
);
1727 high_swapped
= htobe32(MXGE_HIGHPART_TO_U32(seg
->ds_addr
));
1731 flags_next
= flags
& ~MXGEFW_FLAGS_FIRST
;
1733 cum_len_next
= cum_len
+ seglen
;
1734 (req
- rdma_count
)->rdma_count
= rdma_count
+ 1;
1735 if (__predict_true(cum_len
>= 0)) {
1737 chop
= (cum_len_next
> mss
);
1738 cum_len_next
= cum_len_next
% mss
;
1739 next_is_first
= (cum_len_next
== 0);
1740 flags
|= chop
* MXGEFW_FLAGS_TSO_CHOP
;
1742 next_is_first
* MXGEFW_FLAGS_FIRST
;
1743 rdma_count
|= -(chop
| next_is_first
);
1744 rdma_count
+= chop
& !next_is_first
;
1745 } else if (cum_len_next
>= 0) {
1750 small
= (mss
<= MXGEFW_SEND_SMALL_SIZE
);
1751 flags_next
= MXGEFW_FLAGS_TSO_PLD
|
1752 MXGEFW_FLAGS_FIRST
|
1753 (small
* MXGEFW_FLAGS_SMALL
);
1756 req
->addr_high
= high_swapped
;
1757 req
->addr_low
= htobe32(low
);
1758 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
1760 req
->rdma_count
= 1;
1761 req
->length
= htobe16(seglen
);
1762 req
->cksum_offset
= cksum_offset
;
1764 flags
| ((cum_len
& 1) * MXGEFW_FLAGS_ALIGN_ODD
);
1767 cum_len
= cum_len_next
;
1772 if (__predict_false(cksum_offset
> seglen
))
1773 cksum_offset
-= seglen
;
1776 if (__predict_false(cnt
> tx
->max_desc
))
1782 (req
- rdma_count
)->rdma_count
= rdma_count
;
1786 req
->flags
|= MXGEFW_FLAGS_TSO_LAST
;
1787 } while (!(req
->flags
& (MXGEFW_FLAGS_TSO_CHOP
| MXGEFW_FLAGS_FIRST
)));
1789 info_last
= &tx
->info
[((cnt
- 1) + tx
->req
) & tx
->mask
];
1791 info_map
->map
= info_last
->map
;
1792 info_last
->map
= map
;
1795 mxge_submit_req(tx
, tx
->req_list
, cnt
);
1797 if (tx
->send_go
!= NULL
&& tx
->queue_active
== 0) {
1798 /* Tell the NIC to start polling this slice */
1800 tx
->queue_active
= 1;
1807 bus_dmamap_unload(tx
->dmat
, tx
->info
[tx
->req
& tx
->mask
].map
);
1813 mxge_encap(mxge_tx_ring_t
*tx
, struct mbuf
*m
, bus_addr_t zeropad
)
1815 mcp_kreq_ether_send_t
*req
;
1816 bus_dma_segment_t
*seg
;
1818 int cnt
, cum_len
, err
, i
, idx
, odd_flag
;
1819 uint16_t pseudo_hdr_offset
;
1820 uint8_t flags
, cksum_offset
;
1821 struct mxge_buffer_state
*info_map
, *info_last
;
1823 if (m
->m_pkthdr
.csum_flags
& CSUM_TSO
) {
1824 err
= mxge_pullup_tso(&m
);
1825 if (__predict_false(err
))
1830 * Map the frame for DMA
1832 idx
= tx
->req
& tx
->mask
;
1833 info_map
= &tx
->info
[idx
];
1834 map
= info_map
->map
;
1836 err
= bus_dmamap_load_mbuf_defrag(tx
->dmat
, map
, &m
,
1837 tx
->seg_list
, tx
->max_desc
- 2, &cnt
, BUS_DMA_NOWAIT
);
1838 if (__predict_false(err
!= 0))
1840 bus_dmamap_sync(tx
->dmat
, map
, BUS_DMASYNC_PREWRITE
);
1843 * TSO is different enough, we handle it in another routine
1845 if (m
->m_pkthdr
.csum_flags
& CSUM_TSO
)
1846 return mxge_encap_tso(tx
, info_map
, m
, cnt
);
1850 pseudo_hdr_offset
= 0;
1851 flags
= MXGEFW_FLAGS_NO_TSO
;
1854 * Checksum offloading
1856 if (m
->m_pkthdr
.csum_flags
& CSUM_DELAY_DATA
) {
1857 cksum_offset
= m
->m_pkthdr
.csum_lhlen
+ m
->m_pkthdr
.csum_iphlen
;
1858 pseudo_hdr_offset
= cksum_offset
+ m
->m_pkthdr
.csum_data
;
1859 pseudo_hdr_offset
= htobe16(pseudo_hdr_offset
);
1860 req
->cksum_offset
= cksum_offset
;
1861 flags
|= MXGEFW_FLAGS_CKSUM
;
1862 odd_flag
= MXGEFW_FLAGS_ALIGN_ODD
;
1866 if (m
->m_pkthdr
.len
< MXGEFW_SEND_SMALL_SIZE
)
1867 flags
|= MXGEFW_FLAGS_SMALL
;
1870 * Convert segments into a request list
1874 req
->flags
= MXGEFW_FLAGS_FIRST
;
1875 for (i
= 0; i
< cnt
; i
++) {
1876 req
->addr_low
= htobe32(MXGE_LOWPART_TO_U32(seg
->ds_addr
));
1877 req
->addr_high
= htobe32(MXGE_HIGHPART_TO_U32(seg
->ds_addr
));
1878 req
->length
= htobe16(seg
->ds_len
);
1879 req
->cksum_offset
= cksum_offset
;
1880 if (cksum_offset
> seg
->ds_len
)
1881 cksum_offset
-= seg
->ds_len
;
1884 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
1885 req
->pad
= 0; /* complete solid 16-byte block */
1886 req
->rdma_count
= 1;
1887 req
->flags
|= flags
| ((cum_len
& 1) * odd_flag
);
1888 cum_len
+= seg
->ds_len
;
1896 * Pad runt to 60 bytes
1900 req
->addr_low
= htobe32(MXGE_LOWPART_TO_U32(zeropad
));
1901 req
->addr_high
= htobe32(MXGE_HIGHPART_TO_U32(zeropad
));
1902 req
->length
= htobe16(60 - cum_len
);
1903 req
->cksum_offset
= 0;
1904 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
1905 req
->pad
= 0; /* complete solid 16-byte block */
1906 req
->rdma_count
= 1;
1907 req
->flags
|= flags
| ((cum_len
& 1) * odd_flag
);
1911 tx
->req_list
[0].rdma_count
= cnt
;
1913 /* print what the firmware will see */
1914 for (i
= 0; i
< cnt
; i
++) {
1915 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d,"
1916 "cso:%d, flags:0x%x, rdma:%d\n",
1917 i
, (int)ntohl(tx
->req_list
[i
].addr_high
),
1918 (int)ntohl(tx
->req_list
[i
].addr_low
),
1919 (int)ntohs(tx
->req_list
[i
].length
),
1920 (int)ntohs(tx
->req_list
[i
].pseudo_hdr_offset
),
1921 tx
->req_list
[i
].cksum_offset
, tx
->req_list
[i
].flags
,
1922 tx
->req_list
[i
].rdma_count
);
1924 kprintf("--------------\n");
1926 info_last
= &tx
->info
[((cnt
- 1) + tx
->req
) & tx
->mask
];
1928 info_map
->map
= info_last
->map
;
1929 info_last
->map
= map
;
1932 mxge_submit_req(tx
, tx
->req_list
, cnt
);
1934 if (tx
->send_go
!= NULL
&& tx
->queue_active
== 0) {
1935 /* Tell the NIC to start polling this slice */
1937 tx
->queue_active
= 1;
1949 mxge_start(struct ifnet
*ifp
, struct ifaltq_subque
*ifsq
)
1951 mxge_softc_t
*sc
= ifp
->if_softc
;
1952 mxge_tx_ring_t
*tx
= ifsq_get_priv(ifsq
);
1956 KKASSERT(tx
->ifsq
== ifsq
);
1957 ASSERT_SERIALIZED(&tx
->tx_serialize
);
1959 if ((ifp
->if_flags
& IFF_RUNNING
) == 0 || ifsq_is_oactive(ifsq
))
1962 zeropad
= sc
->zeropad_dma
.dmem_busaddr
;
1963 while (tx
->mask
- (tx
->req
- tx
->done
) > tx
->max_desc
) {
1967 m
= ifsq_dequeue(ifsq
);
1972 error
= mxge_encap(tx
, m
, zeropad
);
1976 IFNET_STAT_INC(ifp
, oerrors
, 1);
1979 /* Ran out of transmit slots */
1980 ifsq_set_oactive(ifsq
);
1983 tx
->watchdog
.wd_timer
= 5;
1987 mxge_watchdog(struct ifaltq_subque
*ifsq
)
1989 struct ifnet
*ifp
= ifsq_get_ifp(ifsq
);
1990 struct mxge_softc
*sc
= ifp
->if_softc
;
1991 uint32_t rx_pause
= be32toh(sc
->ss
->fw_stats
->dropped_pause
);
1992 mxge_tx_ring_t
*tx
= ifsq_get_priv(ifsq
);
1994 ASSERT_IFNET_SERIALIZED_ALL(ifp
);
1996 /* Check for pause blocking before resetting */
1997 if (tx
->watchdog_rx_pause
== rx_pause
) {
1998 mxge_warn_stuck(sc
, tx
, 0);
1999 mxge_watchdog_reset(sc
);
2002 if_printf(ifp
, "Flow control blocking xmits, "
2003 "check link partner\n");
2005 tx
->watchdog_rx_pause
= rx_pause
;
2009 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2010 * at most 32 bytes at a time, so as to avoid involving the software
2011 * pio handler in the nic. We re-write the first segment's low
2012 * DMA address to mark it valid only after we write the entire chunk
2015 static __inline
void
2016 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t
*dst
,
2017 mcp_kreq_ether_recv_t
*src
)
2021 low
= src
->addr_low
;
2022 src
->addr_low
= 0xffffffff;
2023 mxge_pio_copy(dst
, src
, 4 * sizeof (*src
));
2025 mxge_pio_copy(dst
+ 4, src
+ 4, 4 * sizeof (*src
));
2027 src
->addr_low
= low
;
2028 dst
->addr_low
= low
;
2033 mxge_get_buf_small(mxge_rx_ring_t
*rx
, bus_dmamap_t map
, int idx
,
2036 bus_dma_segment_t seg
;
2038 int cnt
, err
, mflag
;
2041 if (__predict_false(init
))
2044 m
= m_gethdr(mflag
, MT_DATA
);
2047 if (__predict_false(init
)) {
2049 * During initialization, there
2050 * is nothing to setup; bail out
2056 m
->m_len
= m
->m_pkthdr
.len
= MHLEN
;
2058 err
= bus_dmamap_load_mbuf_segment(rx
->dmat
, map
, m
,
2059 &seg
, 1, &cnt
, BUS_DMA_NOWAIT
);
2062 if (__predict_false(init
)) {
2064 * During initialization, there
2065 * is nothing to setup; bail out
2072 rx
->info
[idx
].m
= m
;
2073 rx
->shadow
[idx
].addr_low
= htobe32(MXGE_LOWPART_TO_U32(seg
.ds_addr
));
2074 rx
->shadow
[idx
].addr_high
= htobe32(MXGE_HIGHPART_TO_U32(seg
.ds_addr
));
2078 mxge_submit_8rx(&rx
->lanai
[idx
- 7], &rx
->shadow
[idx
- 7]);
2083 mxge_get_buf_big(mxge_rx_ring_t
*rx
, bus_dmamap_t map
, int idx
,
2086 bus_dma_segment_t seg
;
2088 int cnt
, err
, mflag
;
2091 if (__predict_false(init
))
2094 if (rx
->cl_size
== MCLBYTES
)
2095 m
= m_getcl(mflag
, MT_DATA
, M_PKTHDR
);
2097 m
= m_getjcl(mflag
, MT_DATA
, M_PKTHDR
, MJUMPAGESIZE
);
2100 if (__predict_false(init
)) {
2102 * During initialization, there
2103 * is nothing to setup; bail out
2109 m
->m_len
= m
->m_pkthdr
.len
= rx
->cl_size
;
2111 err
= bus_dmamap_load_mbuf_segment(rx
->dmat
, map
, m
,
2112 &seg
, 1, &cnt
, BUS_DMA_NOWAIT
);
2115 if (__predict_false(init
)) {
2117 * During initialization, there
2118 * is nothing to setup; bail out
2125 rx
->info
[idx
].m
= m
;
2126 rx
->shadow
[idx
].addr_low
= htobe32(MXGE_LOWPART_TO_U32(seg
.ds_addr
));
2127 rx
->shadow
[idx
].addr_high
= htobe32(MXGE_HIGHPART_TO_U32(seg
.ds_addr
));
2131 mxge_submit_8rx(&rx
->lanai
[idx
- 7], &rx
->shadow
[idx
- 7]);
2136 * Myri10GE hardware checksums are not valid if the sender
2137 * padded the frame with non-zero padding. This is because
2138 * the firmware just does a simple 16-bit 1s complement
2139 * checksum across the entire frame, excluding the first 14
2140 * bytes. It is best to simply to check the checksum and
2141 * tell the stack about it only if the checksum is good
2143 static __inline
uint16_t
2144 mxge_rx_csum(struct mbuf
*m
, int csum
)
2146 const struct ether_header
*eh
;
2147 const struct ip
*ip
;
2150 eh
= mtod(m
, const struct ether_header
*);
2152 /* Only deal with IPv4 TCP & UDP for now */
2153 if (__predict_false(eh
->ether_type
!= htons(ETHERTYPE_IP
)))
2156 ip
= (const struct ip
*)(eh
+ 1);
2157 if (__predict_false(ip
->ip_p
!= IPPROTO_TCP
&& ip
->ip_p
!= IPPROTO_UDP
))
2161 c
= in_pseudo(ip
->ip_src
.s_addr
, ip
->ip_dst
.s_addr
,
2162 htonl(ntohs(csum
) + ntohs(ip
->ip_len
) +
2163 - (ip
->ip_hl
<< 2) + ip
->ip_p
));
2172 mxge_vlan_tag_remove(struct mbuf
*m
, uint32_t *csum
)
2174 struct ether_vlan_header
*evl
;
2177 evl
= mtod(m
, struct ether_vlan_header
*);
2180 * Fix checksum by subtracting EVL_ENCAPLEN bytes after
2181 * what the firmware thought was the end of the ethernet
2185 /* Put checksum into host byte order */
2186 *csum
= ntohs(*csum
);
2188 partial
= ntohl(*(uint32_t *)(mtod(m
, char *) + ETHER_HDR_LEN
));
2190 *csum
+= ((*csum
) < ~partial
);
2191 *csum
= ((*csum
) >> 16) + ((*csum
) & 0xFFFF);
2192 *csum
= ((*csum
) >> 16) + ((*csum
) & 0xFFFF);
2195 * Restore checksum to network byte order;
2196 * later consumers expect this
2198 *csum
= htons(*csum
);
2201 m
->m_pkthdr
.ether_vlantag
= ntohs(evl
->evl_tag
);
2202 m
->m_flags
|= M_VLANTAG
;
2205 * Remove the 802.1q header by copying the Ethernet
2206 * addresses over it and adjusting the beginning of
2207 * the data in the mbuf. The encapsulated Ethernet
2208 * type field is already in place.
2210 bcopy((char *)evl
, (char *)evl
+ EVL_ENCAPLEN
,
2211 ETHER_HDR_LEN
- ETHER_TYPE_LEN
);
2212 m_adj(m
, EVL_ENCAPLEN
);
2216 static __inline
void
2217 mxge_rx_done_big(struct ifnet
*ifp
, mxge_rx_ring_t
*rx
,
2218 uint32_t len
, uint32_t csum
)
2221 const struct ether_header
*eh
;
2222 bus_dmamap_t old_map
;
2225 idx
= rx
->cnt
& rx
->mask
;
2228 /* Save a pointer to the received mbuf */
2229 m
= rx
->info
[idx
].m
;
2231 /* Try to replace the received mbuf */
2232 if (mxge_get_buf_big(rx
, rx
->extra_map
, idx
, FALSE
)) {
2233 /* Drop the frame -- the old mbuf is re-cycled */
2234 IFNET_STAT_INC(ifp
, ierrors
, 1);
2238 /* Unmap the received buffer */
2239 old_map
= rx
->info
[idx
].map
;
2240 bus_dmamap_sync(rx
->dmat
, old_map
, BUS_DMASYNC_POSTREAD
);
2241 bus_dmamap_unload(rx
->dmat
, old_map
);
2243 /* Swap the bus_dmamap_t's */
2244 rx
->info
[idx
].map
= rx
->extra_map
;
2245 rx
->extra_map
= old_map
;
2248 * mcp implicitly skips 1st 2 bytes so that packet is properly
2251 m
->m_data
+= MXGEFW_PAD
;
2253 m
->m_pkthdr
.rcvif
= ifp
;
2254 m
->m_len
= m
->m_pkthdr
.len
= len
;
2256 IFNET_STAT_INC(ifp
, ipackets
, 1);
2258 eh
= mtod(m
, const struct ether_header
*);
2259 if (eh
->ether_type
== htons(ETHERTYPE_VLAN
))
2260 mxge_vlan_tag_remove(m
, &csum
);
2262 /* If the checksum is valid, mark it in the mbuf header */
2263 if ((ifp
->if_capenable
& IFCAP_RXCSUM
) &&
2264 mxge_rx_csum(m
, csum
) == 0) {
2265 /* Tell the stack that the checksum is good */
2266 m
->m_pkthdr
.csum_data
= 0xffff;
2267 m
->m_pkthdr
.csum_flags
= CSUM_PSEUDO_HDR
|
2270 ifp
->if_input(ifp
, m
, NULL
, -1);
2273 static __inline
void
2274 mxge_rx_done_small(struct ifnet
*ifp
, mxge_rx_ring_t
*rx
,
2275 uint32_t len
, uint32_t csum
)
2277 const struct ether_header
*eh
;
2279 bus_dmamap_t old_map
;
2282 idx
= rx
->cnt
& rx
->mask
;
2285 /* Save a pointer to the received mbuf */
2286 m
= rx
->info
[idx
].m
;
2288 /* Try to replace the received mbuf */
2289 if (mxge_get_buf_small(rx
, rx
->extra_map
, idx
, FALSE
)) {
2290 /* Drop the frame -- the old mbuf is re-cycled */
2291 IFNET_STAT_INC(ifp
, ierrors
, 1);
2295 /* Unmap the received buffer */
2296 old_map
= rx
->info
[idx
].map
;
2297 bus_dmamap_sync(rx
->dmat
, old_map
, BUS_DMASYNC_POSTREAD
);
2298 bus_dmamap_unload(rx
->dmat
, old_map
);
2300 /* Swap the bus_dmamap_t's */
2301 rx
->info
[idx
].map
= rx
->extra_map
;
2302 rx
->extra_map
= old_map
;
2305 * mcp implicitly skips 1st 2 bytes so that packet is properly
2308 m
->m_data
+= MXGEFW_PAD
;
2310 m
->m_pkthdr
.rcvif
= ifp
;
2311 m
->m_len
= m
->m_pkthdr
.len
= len
;
2313 IFNET_STAT_INC(ifp
, ipackets
, 1);
2315 eh
= mtod(m
, const struct ether_header
*);
2316 if (eh
->ether_type
== htons(ETHERTYPE_VLAN
))
2317 mxge_vlan_tag_remove(m
, &csum
);
2319 /* If the checksum is valid, mark it in the mbuf header */
2320 if ((ifp
->if_capenable
& IFCAP_RXCSUM
) &&
2321 mxge_rx_csum(m
, csum
) == 0) {
2322 /* Tell the stack that the checksum is good */
2323 m
->m_pkthdr
.csum_data
= 0xffff;
2324 m
->m_pkthdr
.csum_flags
= CSUM_PSEUDO_HDR
|
2327 ifp
->if_input(ifp
, m
, NULL
, -1);
2330 static __inline
void
2331 mxge_clean_rx_done(struct ifnet
*ifp
, struct mxge_rx_data
*rx_data
, int cycle
)
2333 mxge_rx_done_t
*rx_done
= &rx_data
->rx_done
;
2335 while (rx_done
->entry
[rx_done
->idx
].length
!= 0 && cycle
!= 0) {
2336 uint16_t length
, checksum
;
2338 length
= ntohs(rx_done
->entry
[rx_done
->idx
].length
);
2339 rx_done
->entry
[rx_done
->idx
].length
= 0;
2341 checksum
= rx_done
->entry
[rx_done
->idx
].checksum
;
2343 if (length
<= MXGE_RX_SMALL_BUFLEN
) {
2344 mxge_rx_done_small(ifp
, &rx_data
->rx_small
,
2347 mxge_rx_done_big(ifp
, &rx_data
->rx_big
,
2352 rx_done
->idx
&= rx_done
->mask
;
2357 static __inline
void
2358 mxge_tx_done(struct ifnet
*ifp
, mxge_tx_ring_t
*tx
, uint32_t mcp_idx
)
2360 ASSERT_SERIALIZED(&tx
->tx_serialize
);
2362 while (tx
->pkt_done
!= mcp_idx
) {
2366 idx
= tx
->done
& tx
->mask
;
2369 m
= tx
->info
[idx
].m
;
2371 * mbuf and DMA map only attached to the first
2376 IFNET_STAT_INC(ifp
, opackets
, 1);
2377 tx
->info
[idx
].m
= NULL
;
2378 bus_dmamap_unload(tx
->dmat
, tx
->info
[idx
].map
);
2384 * If we have space, clear OACTIVE to tell the stack that
2385 * its OK to send packets
2387 if (tx
->req
- tx
->done
< (tx
->mask
+ 1) / 2) {
2388 ifsq_clr_oactive(tx
->ifsq
);
2389 if (tx
->req
== tx
->done
) {
2390 /* Reset watchdog */
2391 tx
->watchdog
.wd_timer
= 0;
2395 if (!ifsq_is_empty(tx
->ifsq
))
2396 ifsq_devstart(tx
->ifsq
);
2398 if (tx
->send_stop
!= NULL
&& tx
->req
== tx
->done
) {
2400 * Let the NIC stop polling this queue, since there
2401 * are no more transmits pending
2404 tx
->queue_active
= 0;
2410 static struct mxge_media_type mxge_xfp_media_types
[] = {
2411 {IFM_10G_CX4
, 0x7f, "10GBASE-CX4 (module)"},
2412 {IFM_10G_SR
, (1 << 7), "10GBASE-SR"},
2413 {IFM_10G_LR
, (1 << 6), "10GBASE-LR"},
2414 {IFM_NONE
, (1 << 5), "10GBASE-ER"},
2415 {IFM_10G_LRM
, (1 << 4), "10GBASE-LRM"},
2416 {IFM_NONE
, (1 << 3), "10GBASE-SW"},
2417 {IFM_NONE
, (1 << 2), "10GBASE-LW"},
2418 {IFM_NONE
, (1 << 1), "10GBASE-EW"},
2419 {IFM_NONE
, (1 << 0), "Reserved"}
2422 static struct mxge_media_type mxge_sfp_media_types
[] = {
2423 {IFM_10G_TWINAX
, 0, "10GBASE-Twinax"},
2424 {IFM_NONE
, (1 << 7), "Reserved"},
2425 {IFM_10G_LRM
, (1 << 6), "10GBASE-LRM"},
2426 {IFM_10G_LR
, (1 << 5), "10GBASE-LR"},
2427 {IFM_10G_SR
, (1 << 4), "10GBASE-SR"},
2428 {IFM_10G_TWINAX
,(1 << 0), "10GBASE-Twinax"}
2432 mxge_media_set(mxge_softc_t
*sc
, int media_type
)
2436 if (media_type
== IFM_NONE
)
2440 fc_opt
= IFM_ETH_RXPAUSE
| IFM_ETH_TXPAUSE
;
2442 ifmedia_add(&sc
->media
, MXGE_IFM
| media_type
, 0, NULL
);
2443 ifmedia_set(&sc
->media
, MXGE_IFM
| media_type
| fc_opt
);
2445 sc
->current_media
= media_type
;
2449 mxge_media_unset(mxge_softc_t
*sc
)
2451 ifmedia_removeall(&sc
->media
);
2452 sc
->current_media
= IFM_NONE
;
2456 mxge_media_init(mxge_softc_t
*sc
)
2461 mxge_media_unset(sc
);
2464 * Parse the product code to deterimine the interface type
2465 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2466 * after the 3rd dash in the driver's cached copy of the
2467 * EEPROM's product code string.
2469 ptr
= sc
->product_code_string
;
2471 if_printf(sc
->ifp
, "Missing product code\n");
2475 for (i
= 0; i
< 3; i
++, ptr
++) {
2476 ptr
= strchr(ptr
, '-');
2478 if_printf(sc
->ifp
, "only %d dashes in PC?!?\n", i
);
2482 if (*ptr
== 'C' || *(ptr
+1) == 'C') {
2484 sc
->connector
= MXGE_CX4
;
2485 mxge_media_set(sc
, IFM_10G_CX4
);
2486 } else if (*ptr
== 'Q') {
2487 /* -Q is Quad Ribbon Fiber */
2488 sc
->connector
= MXGE_QRF
;
2489 if_printf(sc
->ifp
, "Quad Ribbon Fiber Media\n");
2490 /* DragonFly has no media type for Quad ribbon fiber */
2491 } else if (*ptr
== 'R') {
2493 sc
->connector
= MXGE_XFP
;
2494 /* NOTE: ifmedia will be installed later */
2495 } else if (*ptr
== 'S' || *(ptr
+1) == 'S') {
2496 /* -S or -2S is SFP+ */
2497 sc
->connector
= MXGE_SFP
;
2498 /* NOTE: ifmedia will be installed later */
2500 sc
->connector
= MXGE_UNK
;
2501 if_printf(sc
->ifp
, "Unknown media type: %c\n", *ptr
);
2506 * Determine the media type for a NIC. Some XFPs will identify
2507 * themselves only when their link is up, so this is initiated via a
2508 * link up interrupt. However, this can potentially take up to
2509 * several milliseconds, so it is run via the watchdog routine, rather
2510 * than in the interrupt handler itself.
2513 mxge_media_probe(mxge_softc_t
*sc
)
2516 const char *cage_type
;
2517 struct mxge_media_type
*mxge_media_types
= NULL
;
2518 int i
, err
, ms
, mxge_media_type_entries
;
2521 sc
->need_media_probe
= 0;
2523 if (sc
->connector
== MXGE_XFP
) {
2525 mxge_media_types
= mxge_xfp_media_types
;
2526 mxge_media_type_entries
= NELEM(mxge_xfp_media_types
);
2527 byte
= MXGE_XFP_COMPLIANCE_BYTE
;
2529 } else if (sc
->connector
== MXGE_SFP
) {
2530 /* -S or -2S is SFP+ */
2531 mxge_media_types
= mxge_sfp_media_types
;
2532 mxge_media_type_entries
= NELEM(mxge_sfp_media_types
);
2536 /* nothing to do; media type cannot change */
2541 * At this point we know the NIC has an XFP cage, so now we
2542 * try to determine what is in the cage by using the
2543 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2544 * register. We read just one byte, which may take over
2548 bzero(&cmd
, sizeof(cmd
)); /* silence gcc warning */
2549 cmd
.data0
= 0; /* just fetch 1 byte, not all 256 */
2551 err
= mxge_send_cmd(sc
, MXGEFW_CMD_I2C_READ
, &cmd
);
2552 if (err
!= MXGEFW_CMD_OK
) {
2553 if (err
== MXGEFW_CMD_ERROR_I2C_FAILURE
)
2554 if_printf(sc
->ifp
, "failed to read XFP\n");
2555 else if (err
== MXGEFW_CMD_ERROR_I2C_ABSENT
)
2556 if_printf(sc
->ifp
, "Type R/S with no XFP!?!?\n");
2558 if_printf(sc
->ifp
, "I2C read failed, err: %d", err
);
2559 mxge_media_unset(sc
);
2563 /* Now we wait for the data to be cached */
2565 err
= mxge_send_cmd(sc
, MXGEFW_CMD_I2C_BYTE
, &cmd
);
2566 for (ms
= 0; err
== EBUSY
&& ms
< 50; ms
++) {
2569 err
= mxge_send_cmd(sc
, MXGEFW_CMD_I2C_BYTE
, &cmd
);
2571 if (err
!= MXGEFW_CMD_OK
) {
2572 if_printf(sc
->ifp
, "failed to read %s (%d, %dms)\n",
2573 cage_type
, err
, ms
);
2574 mxge_media_unset(sc
);
2578 if (cmd
.data0
== mxge_media_types
[0].bitmask
) {
2580 if_printf(sc
->ifp
, "%s:%s\n", cage_type
,
2581 mxge_media_types
[0].name
);
2583 if (sc
->current_media
!= mxge_media_types
[0].flag
) {
2584 mxge_media_unset(sc
);
2585 mxge_media_set(sc
, mxge_media_types
[0].flag
);
2589 for (i
= 1; i
< mxge_media_type_entries
; i
++) {
2590 if (cmd
.data0
& mxge_media_types
[i
].bitmask
) {
2592 if_printf(sc
->ifp
, "%s:%s\n", cage_type
,
2593 mxge_media_types
[i
].name
);
2596 if (sc
->current_media
!= mxge_media_types
[i
].flag
) {
2597 mxge_media_unset(sc
);
2598 mxge_media_set(sc
, mxge_media_types
[i
].flag
);
2603 mxge_media_unset(sc
);
2605 if_printf(sc
->ifp
, "%s media 0x%x unknown\n", cage_type
,
2611 mxge_intr_status(struct mxge_softc
*sc
, const mcp_irq_data_t
*stats
)
2613 if (sc
->link_state
!= stats
->link_up
) {
2614 sc
->link_state
= stats
->link_up
;
2615 if (sc
->link_state
) {
2616 sc
->ifp
->if_link_state
= LINK_STATE_UP
;
2617 if_link_state_change(sc
->ifp
);
2619 if_printf(sc
->ifp
, "link up\n");
2621 sc
->ifp
->if_link_state
= LINK_STATE_DOWN
;
2622 if_link_state_change(sc
->ifp
);
2624 if_printf(sc
->ifp
, "link down\n");
2626 sc
->need_media_probe
= 1;
2629 if (sc
->rdma_tags_available
!= be32toh(stats
->rdma_tags_available
)) {
2630 sc
->rdma_tags_available
= be32toh(stats
->rdma_tags_available
);
2631 if_printf(sc
->ifp
, "RDMA timed out! %d tags left\n",
2632 sc
->rdma_tags_available
);
2635 if (stats
->link_down
) {
2636 sc
->down_cnt
+= stats
->link_down
;
2638 sc
->ifp
->if_link_state
= LINK_STATE_DOWN
;
2639 if_link_state_change(sc
->ifp
);
2644 mxge_serialize_skipmain(struct mxge_softc
*sc
)
2646 lwkt_serialize_array_enter(sc
->serializes
, sc
->nserialize
, 1);
2650 mxge_deserialize_skipmain(struct mxge_softc
*sc
)
2652 lwkt_serialize_array_exit(sc
->serializes
, sc
->nserialize
, 1);
2656 mxge_legacy(void *arg
)
2658 struct mxge_slice_state
*ss
= arg
;
2659 mxge_softc_t
*sc
= ss
->sc
;
2660 mcp_irq_data_t
*stats
= ss
->fw_stats
;
2661 mxge_tx_ring_t
*tx
= &ss
->tx
;
2662 mxge_rx_done_t
*rx_done
= &ss
->rx_data
.rx_done
;
2663 uint32_t send_done_count
;
2666 ASSERT_SERIALIZED(&sc
->main_serialize
);
2668 /* Make sure the DMA has finished */
2671 valid
= stats
->valid
;
2673 /* Lower legacy IRQ */
2674 *sc
->irq_deassert
= 0;
2675 if (!mxge_deassert_wait
) {
2676 /* Don't wait for conf. that irq is low */
2680 mxge_serialize_skipmain(sc
);
2683 * Loop while waiting for legacy irq deassertion
2684 * XXX do we really want to loop?
2687 /* Check for transmit completes and receives */
2688 send_done_count
= be32toh(stats
->send_done_count
);
2689 while ((send_done_count
!= tx
->pkt_done
) ||
2690 (rx_done
->entry
[rx_done
->idx
].length
!= 0)) {
2691 if (send_done_count
!= tx
->pkt_done
) {
2692 mxge_tx_done(&sc
->arpcom
.ac_if
, tx
,
2693 (int)send_done_count
);
2695 mxge_clean_rx_done(&sc
->arpcom
.ac_if
, &ss
->rx_data
, -1);
2696 send_done_count
= be32toh(stats
->send_done_count
);
2698 if (mxge_deassert_wait
)
2700 } while (*((volatile uint8_t *)&stats
->valid
));
2702 mxge_deserialize_skipmain(sc
);
2704 /* Fw link & error stats meaningful only on the first slice */
2705 if (__predict_false(stats
->stats_updated
))
2706 mxge_intr_status(sc
, stats
);
2708 /* Check to see if we have rx token to pass back */
2710 *ss
->irq_claim
= be32toh(3);
2711 *(ss
->irq_claim
+ 1) = be32toh(3);
2717 struct mxge_slice_state
*ss
= arg
;
2718 mxge_softc_t
*sc
= ss
->sc
;
2719 mcp_irq_data_t
*stats
= ss
->fw_stats
;
2720 mxge_tx_ring_t
*tx
= &ss
->tx
;
2721 mxge_rx_done_t
*rx_done
= &ss
->rx_data
.rx_done
;
2722 uint32_t send_done_count
;
2724 #ifndef IFPOLL_ENABLE
2725 const boolean_t polling
= FALSE
;
2727 boolean_t polling
= FALSE
;
2730 ASSERT_SERIALIZED(&sc
->main_serialize
);
2732 /* Make sure the DMA has finished */
2733 if (__predict_false(!stats
->valid
))
2736 valid
= stats
->valid
;
2739 #ifdef IFPOLL_ENABLE
2740 if (sc
->arpcom
.ac_if
.if_flags
& IFF_NPOLLING
)
2745 /* Check for receives */
2746 lwkt_serialize_enter(&ss
->rx_data
.rx_serialize
);
2747 if (rx_done
->entry
[rx_done
->idx
].length
!= 0)
2748 mxge_clean_rx_done(&sc
->arpcom
.ac_if
, &ss
->rx_data
, -1);
2749 lwkt_serialize_exit(&ss
->rx_data
.rx_serialize
);
2753 * Check for transmit completes
2756 * Since pkt_done is only changed by mxge_tx_done(),
2757 * which is called only in interrupt handler, the
2758 * check w/o holding tx serializer is MPSAFE.
2760 send_done_count
= be32toh(stats
->send_done_count
);
2761 if (send_done_count
!= tx
->pkt_done
) {
2762 lwkt_serialize_enter(&tx
->tx_serialize
);
2763 mxge_tx_done(&sc
->arpcom
.ac_if
, tx
, (int)send_done_count
);
2764 lwkt_serialize_exit(&tx
->tx_serialize
);
2767 if (__predict_false(stats
->stats_updated
))
2768 mxge_intr_status(sc
, stats
);
2770 /* Check to see if we have rx token to pass back */
2771 if (!polling
&& (valid
& 0x1))
2772 *ss
->irq_claim
= be32toh(3);
2773 *(ss
->irq_claim
+ 1) = be32toh(3);
2777 mxge_msix_rx(void *arg
)
2779 struct mxge_slice_state
*ss
= arg
;
2780 mxge_rx_done_t
*rx_done
= &ss
->rx_data
.rx_done
;
2782 #ifdef IFPOLL_ENABLE
2783 if (ss
->sc
->arpcom
.ac_if
.if_flags
& IFF_NPOLLING
)
2787 ASSERT_SERIALIZED(&ss
->rx_data
.rx_serialize
);
2789 if (rx_done
->entry
[rx_done
->idx
].length
!= 0)
2790 mxge_clean_rx_done(&ss
->sc
->arpcom
.ac_if
, &ss
->rx_data
, -1);
2792 *ss
->irq_claim
= be32toh(3);
2796 mxge_msix_rxtx(void *arg
)
2798 struct mxge_slice_state
*ss
= arg
;
2799 mxge_softc_t
*sc
= ss
->sc
;
2800 mcp_irq_data_t
*stats
= ss
->fw_stats
;
2801 mxge_tx_ring_t
*tx
= &ss
->tx
;
2802 mxge_rx_done_t
*rx_done
= &ss
->rx_data
.rx_done
;
2803 uint32_t send_done_count
;
2805 #ifndef IFPOLL_ENABLE
2806 const boolean_t polling
= FALSE
;
2808 boolean_t polling
= FALSE
;
2811 ASSERT_SERIALIZED(&ss
->rx_data
.rx_serialize
);
2813 /* Make sure the DMA has finished */
2814 if (__predict_false(!stats
->valid
))
2817 valid
= stats
->valid
;
2820 #ifdef IFPOLL_ENABLE
2821 if (sc
->arpcom
.ac_if
.if_flags
& IFF_NPOLLING
)
2825 /* Check for receives */
2826 if (!polling
&& rx_done
->entry
[rx_done
->idx
].length
!= 0)
2827 mxge_clean_rx_done(&sc
->arpcom
.ac_if
, &ss
->rx_data
, -1);
2830 * Check for transmit completes
2833 * Since pkt_done is only changed by mxge_tx_done(),
2834 * which is called only in interrupt handler, the
2835 * check w/o holding tx serializer is MPSAFE.
2837 send_done_count
= be32toh(stats
->send_done_count
);
2838 if (send_done_count
!= tx
->pkt_done
) {
2839 lwkt_serialize_enter(&tx
->tx_serialize
);
2840 mxge_tx_done(&sc
->arpcom
.ac_if
, tx
, (int)send_done_count
);
2841 lwkt_serialize_exit(&tx
->tx_serialize
);
2844 /* Check to see if we have rx token to pass back */
2845 if (!polling
&& (valid
& 0x1))
2846 *ss
->irq_claim
= be32toh(3);
2847 *(ss
->irq_claim
+ 1) = be32toh(3);
2851 mxge_init(void *arg
)
2853 struct mxge_softc
*sc
= arg
;
2855 ASSERT_IFNET_SERIALIZED_ALL(sc
->ifp
);
2856 if ((sc
->ifp
->if_flags
& IFF_RUNNING
) == 0)
2861 mxge_free_slice_mbufs(struct mxge_slice_state
*ss
)
2865 for (i
= 0; i
<= ss
->rx_data
.rx_big
.mask
; i
++) {
2866 if (ss
->rx_data
.rx_big
.info
[i
].m
== NULL
)
2868 bus_dmamap_unload(ss
->rx_data
.rx_big
.dmat
,
2869 ss
->rx_data
.rx_big
.info
[i
].map
);
2870 m_freem(ss
->rx_data
.rx_big
.info
[i
].m
);
2871 ss
->rx_data
.rx_big
.info
[i
].m
= NULL
;
2874 for (i
= 0; i
<= ss
->rx_data
.rx_small
.mask
; i
++) {
2875 if (ss
->rx_data
.rx_small
.info
[i
].m
== NULL
)
2877 bus_dmamap_unload(ss
->rx_data
.rx_small
.dmat
,
2878 ss
->rx_data
.rx_small
.info
[i
].map
);
2879 m_freem(ss
->rx_data
.rx_small
.info
[i
].m
);
2880 ss
->rx_data
.rx_small
.info
[i
].m
= NULL
;
2883 /* Transmit ring used only on the first slice */
2884 if (ss
->tx
.info
== NULL
)
2887 for (i
= 0; i
<= ss
->tx
.mask
; i
++) {
2888 if (ss
->tx
.info
[i
].m
== NULL
)
2890 bus_dmamap_unload(ss
->tx
.dmat
, ss
->tx
.info
[i
].map
);
2891 m_freem(ss
->tx
.info
[i
].m
);
2892 ss
->tx
.info
[i
].m
= NULL
;
2897 mxge_free_mbufs(mxge_softc_t
*sc
)
2901 for (slice
= 0; slice
< sc
->num_slices
; slice
++)
2902 mxge_free_slice_mbufs(&sc
->ss
[slice
]);
2906 mxge_free_slice_rings(struct mxge_slice_state
*ss
)
2910 if (ss
->rx_data
.rx_done
.entry
!= NULL
) {
2911 mxge_dma_free(&ss
->rx_done_dma
);
2912 ss
->rx_data
.rx_done
.entry
= NULL
;
2915 if (ss
->tx
.req_list
!= NULL
) {
2916 kfree(ss
->tx
.req_list
, M_DEVBUF
);
2917 ss
->tx
.req_list
= NULL
;
2920 if (ss
->tx
.seg_list
!= NULL
) {
2921 kfree(ss
->tx
.seg_list
, M_DEVBUF
);
2922 ss
->tx
.seg_list
= NULL
;
2925 if (ss
->rx_data
.rx_small
.shadow
!= NULL
) {
2926 kfree(ss
->rx_data
.rx_small
.shadow
, M_DEVBUF
);
2927 ss
->rx_data
.rx_small
.shadow
= NULL
;
2930 if (ss
->rx_data
.rx_big
.shadow
!= NULL
) {
2931 kfree(ss
->rx_data
.rx_big
.shadow
, M_DEVBUF
);
2932 ss
->rx_data
.rx_big
.shadow
= NULL
;
2935 if (ss
->tx
.info
!= NULL
) {
2936 if (ss
->tx
.dmat
!= NULL
) {
2937 for (i
= 0; i
<= ss
->tx
.mask
; i
++) {
2938 bus_dmamap_destroy(ss
->tx
.dmat
,
2939 ss
->tx
.info
[i
].map
);
2941 bus_dma_tag_destroy(ss
->tx
.dmat
);
2943 kfree(ss
->tx
.info
, M_DEVBUF
);
2947 if (ss
->rx_data
.rx_small
.info
!= NULL
) {
2948 if (ss
->rx_data
.rx_small
.dmat
!= NULL
) {
2949 for (i
= 0; i
<= ss
->rx_data
.rx_small
.mask
; i
++) {
2950 bus_dmamap_destroy(ss
->rx_data
.rx_small
.dmat
,
2951 ss
->rx_data
.rx_small
.info
[i
].map
);
2953 bus_dmamap_destroy(ss
->rx_data
.rx_small
.dmat
,
2954 ss
->rx_data
.rx_small
.extra_map
);
2955 bus_dma_tag_destroy(ss
->rx_data
.rx_small
.dmat
);
2957 kfree(ss
->rx_data
.rx_small
.info
, M_DEVBUF
);
2958 ss
->rx_data
.rx_small
.info
= NULL
;
2961 if (ss
->rx_data
.rx_big
.info
!= NULL
) {
2962 if (ss
->rx_data
.rx_big
.dmat
!= NULL
) {
2963 for (i
= 0; i
<= ss
->rx_data
.rx_big
.mask
; i
++) {
2964 bus_dmamap_destroy(ss
->rx_data
.rx_big
.dmat
,
2965 ss
->rx_data
.rx_big
.info
[i
].map
);
2967 bus_dmamap_destroy(ss
->rx_data
.rx_big
.dmat
,
2968 ss
->rx_data
.rx_big
.extra_map
);
2969 bus_dma_tag_destroy(ss
->rx_data
.rx_big
.dmat
);
2971 kfree(ss
->rx_data
.rx_big
.info
, M_DEVBUF
);
2972 ss
->rx_data
.rx_big
.info
= NULL
;
2977 mxge_free_rings(mxge_softc_t
*sc
)
2984 for (slice
= 0; slice
< sc
->num_slices
; slice
++)
2985 mxge_free_slice_rings(&sc
->ss
[slice
]);
2989 mxge_alloc_slice_rings(struct mxge_slice_state
*ss
, int rx_ring_entries
,
2990 int tx_ring_entries
)
2992 mxge_softc_t
*sc
= ss
->sc
;
2997 * Allocate per-slice receive resources
3000 ss
->rx_data
.rx_small
.mask
= ss
->rx_data
.rx_big
.mask
=
3001 rx_ring_entries
- 1;
3002 ss
->rx_data
.rx_done
.mask
= (2 * rx_ring_entries
) - 1;
3004 /* Allocate the rx shadow rings */
3005 bytes
= rx_ring_entries
* sizeof(*ss
->rx_data
.rx_small
.shadow
);
3006 ss
->rx_data
.rx_small
.shadow
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3008 bytes
= rx_ring_entries
* sizeof(*ss
->rx_data
.rx_big
.shadow
);
3009 ss
->rx_data
.rx_big
.shadow
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3011 /* Allocate the rx host info rings */
3012 bytes
= rx_ring_entries
* sizeof(*ss
->rx_data
.rx_small
.info
);
3013 ss
->rx_data
.rx_small
.info
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3015 bytes
= rx_ring_entries
* sizeof(*ss
->rx_data
.rx_big
.info
);
3016 ss
->rx_data
.rx_big
.info
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3018 /* Allocate the rx busdma resources */
3019 err
= bus_dma_tag_create(sc
->parent_dmat
, /* parent */
3021 4096, /* boundary */
3022 BUS_SPACE_MAXADDR
, /* low */
3023 BUS_SPACE_MAXADDR
, /* high */
3024 NULL
, NULL
, /* filter */
3025 MHLEN
, /* maxsize */
3027 MHLEN
, /* maxsegsize */
3028 BUS_DMA_WAITOK
| BUS_DMA_ALLOCNOW
,
3030 &ss
->rx_data
.rx_small
.dmat
); /* tag */
3032 device_printf(sc
->dev
, "Err %d allocating rx_small dmat\n",
3037 err
= bus_dmamap_create(ss
->rx_data
.rx_small
.dmat
, BUS_DMA_WAITOK
,
3038 &ss
->rx_data
.rx_small
.extra_map
);
3040 device_printf(sc
->dev
, "Err %d extra rx_small dmamap\n", err
);
3041 bus_dma_tag_destroy(ss
->rx_data
.rx_small
.dmat
);
3042 ss
->rx_data
.rx_small
.dmat
= NULL
;
3045 for (i
= 0; i
<= ss
->rx_data
.rx_small
.mask
; i
++) {
3046 err
= bus_dmamap_create(ss
->rx_data
.rx_small
.dmat
,
3047 BUS_DMA_WAITOK
, &ss
->rx_data
.rx_small
.info
[i
].map
);
3051 device_printf(sc
->dev
, "Err %d rx_small dmamap\n", err
);
3053 for (j
= 0; j
< i
; ++j
) {
3054 bus_dmamap_destroy(ss
->rx_data
.rx_small
.dmat
,
3055 ss
->rx_data
.rx_small
.info
[j
].map
);
3057 bus_dmamap_destroy(ss
->rx_data
.rx_small
.dmat
,
3058 ss
->rx_data
.rx_small
.extra_map
);
3059 bus_dma_tag_destroy(ss
->rx_data
.rx_small
.dmat
);
3060 ss
->rx_data
.rx_small
.dmat
= NULL
;
3065 err
= bus_dma_tag_create(sc
->parent_dmat
, /* parent */
3067 4096, /* boundary */
3068 BUS_SPACE_MAXADDR
, /* low */
3069 BUS_SPACE_MAXADDR
, /* high */
3070 NULL
, NULL
, /* filter */
3073 4096, /* maxsegsize*/
3074 BUS_DMA_WAITOK
| BUS_DMA_ALLOCNOW
,
3076 &ss
->rx_data
.rx_big
.dmat
); /* tag */
3078 device_printf(sc
->dev
, "Err %d allocating rx_big dmat\n",
3083 err
= bus_dmamap_create(ss
->rx_data
.rx_big
.dmat
, BUS_DMA_WAITOK
,
3084 &ss
->rx_data
.rx_big
.extra_map
);
3086 device_printf(sc
->dev
, "Err %d extra rx_big dmamap\n", err
);
3087 bus_dma_tag_destroy(ss
->rx_data
.rx_big
.dmat
);
3088 ss
->rx_data
.rx_big
.dmat
= NULL
;
3091 for (i
= 0; i
<= ss
->rx_data
.rx_big
.mask
; i
++) {
3092 err
= bus_dmamap_create(ss
->rx_data
.rx_big
.dmat
, BUS_DMA_WAITOK
,
3093 &ss
->rx_data
.rx_big
.info
[i
].map
);
3097 device_printf(sc
->dev
, "Err %d rx_big dmamap\n", err
);
3098 for (j
= 0; j
< i
; ++j
) {
3099 bus_dmamap_destroy(ss
->rx_data
.rx_big
.dmat
,
3100 ss
->rx_data
.rx_big
.info
[j
].map
);
3102 bus_dmamap_destroy(ss
->rx_data
.rx_big
.dmat
,
3103 ss
->rx_data
.rx_big
.extra_map
);
3104 bus_dma_tag_destroy(ss
->rx_data
.rx_big
.dmat
);
3105 ss
->rx_data
.rx_big
.dmat
= NULL
;
3111 * Now allocate TX resources
3114 ss
->tx
.mask
= tx_ring_entries
- 1;
3115 ss
->tx
.max_desc
= MIN(MXGE_MAX_SEND_DESC
, tx_ring_entries
/ 4);
3118 * Allocate the tx request copy block; MUST be at least 8 bytes
3121 bytes
= sizeof(*ss
->tx
.req_list
) * (ss
->tx
.max_desc
+ 4);
3122 ss
->tx
.req_list
= kmalloc_cachealign(__VM_CACHELINE_ALIGN(bytes
),
3123 M_DEVBUF
, M_WAITOK
);
3125 /* Allocate the tx busdma segment list */
3126 bytes
= sizeof(*ss
->tx
.seg_list
) * ss
->tx
.max_desc
;
3127 ss
->tx
.seg_list
= kmalloc(bytes
, M_DEVBUF
, M_WAITOK
);
3129 /* Allocate the tx host info ring */
3130 bytes
= tx_ring_entries
* sizeof(*ss
->tx
.info
);
3131 ss
->tx
.info
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3133 /* Allocate the tx busdma resources */
3134 err
= bus_dma_tag_create(sc
->parent_dmat
, /* parent */
3136 sc
->tx_boundary
, /* boundary */
3137 BUS_SPACE_MAXADDR
, /* low */
3138 BUS_SPACE_MAXADDR
, /* high */
3139 NULL
, NULL
, /* filter */
3141 sizeof(struct ether_vlan_header
),
3143 ss
->tx
.max_desc
- 2, /* num segs */
3144 sc
->tx_boundary
, /* maxsegsz */
3145 BUS_DMA_WAITOK
| BUS_DMA_ALLOCNOW
|
3146 BUS_DMA_ONEBPAGE
, /* flags */
3147 &ss
->tx
.dmat
); /* tag */
3149 device_printf(sc
->dev
, "Err %d allocating tx dmat\n", err
);
3154 * Now use these tags to setup DMA maps for each slot in the ring
3156 for (i
= 0; i
<= ss
->tx
.mask
; i
++) {
3157 err
= bus_dmamap_create(ss
->tx
.dmat
,
3158 BUS_DMA_WAITOK
| BUS_DMA_ONEBPAGE
, &ss
->tx
.info
[i
].map
);
3162 device_printf(sc
->dev
, "Err %d tx dmamap\n", err
);
3163 for (j
= 0; j
< i
; ++j
) {
3164 bus_dmamap_destroy(ss
->tx
.dmat
,
3165 ss
->tx
.info
[j
].map
);
3167 bus_dma_tag_destroy(ss
->tx
.dmat
);
3176 mxge_alloc_rings(mxge_softc_t
*sc
)
3180 int tx_ring_entries
, rx_ring_entries
;
3183 /* Get ring sizes */
3184 err
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_SEND_RING_SIZE
, &cmd
);
3186 device_printf(sc
->dev
, "Cannot determine tx ring sizes\n");
3189 tx_ring_size
= cmd
.data0
;
3191 tx_ring_entries
= tx_ring_size
/ sizeof(mcp_kreq_ether_send_t
);
3192 rx_ring_entries
= sc
->rx_intr_slots
/ 2;
3195 device_printf(sc
->dev
, "tx desc %d, rx desc %d\n",
3196 tx_ring_entries
, rx_ring_entries
);
3199 sc
->ifp
->if_nmbclusters
= rx_ring_entries
* sc
->num_slices
;
3200 sc
->ifp
->if_nmbjclusters
= sc
->ifp
->if_nmbclusters
;
3202 ifq_set_maxlen(&sc
->ifp
->if_snd
, tx_ring_entries
- 1);
3203 ifq_set_ready(&sc
->ifp
->if_snd
);
3204 ifq_set_subq_cnt(&sc
->ifp
->if_snd
, sc
->num_tx_rings
);
3206 if (sc
->num_tx_rings
> 1) {
3207 sc
->ifp
->if_mapsubq
= ifq_mapsubq_mask
;
3208 ifq_set_subq_mask(&sc
->ifp
->if_snd
, sc
->num_tx_rings
- 1);
3211 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
3212 err
= mxge_alloc_slice_rings(&sc
->ss
[slice
],
3213 rx_ring_entries
, tx_ring_entries
);
3215 device_printf(sc
->dev
,
3216 "alloc %d slice rings failed\n", slice
);
3224 mxge_choose_params(int mtu
, int *cl_size
)
3226 int bufsize
= mtu
+ ETHER_HDR_LEN
+ EVL_ENCAPLEN
+ MXGEFW_PAD
;
3228 if (bufsize
< MCLBYTES
) {
3229 *cl_size
= MCLBYTES
;
3231 KASSERT(bufsize
< MJUMPAGESIZE
, ("invalid MTU %d", mtu
));
3232 *cl_size
= MJUMPAGESIZE
;
3237 mxge_slice_open(struct mxge_slice_state
*ss
, int cl_size
)
3242 slice
= ss
- ss
->sc
->ss
;
3245 * Get the lanai pointers to the send and receive rings
3249 bzero(&cmd
, sizeof(cmd
)); /* silence gcc warning */
3250 if (ss
->sc
->num_tx_rings
== 1) {
3253 err
= mxge_send_cmd(ss
->sc
, MXGEFW_CMD_GET_SEND_OFFSET
,
3255 ss
->tx
.lanai
= (volatile mcp_kreq_ether_send_t
*)
3256 (ss
->sc
->sram
+ cmd
.data0
);
3257 /* Leave send_go and send_stop as NULL */
3261 err
= mxge_send_cmd(ss
->sc
, MXGEFW_CMD_GET_SEND_OFFSET
, &cmd
);
3262 ss
->tx
.lanai
= (volatile mcp_kreq_ether_send_t
*)
3263 (ss
->sc
->sram
+ cmd
.data0
);
3264 ss
->tx
.send_go
= (volatile uint32_t *)
3265 (ss
->sc
->sram
+ MXGEFW_ETH_SEND_GO
+ 64 * slice
);
3266 ss
->tx
.send_stop
= (volatile uint32_t *)
3267 (ss
->sc
->sram
+ MXGEFW_ETH_SEND_STOP
+ 64 * slice
);
3271 err
|= mxge_send_cmd(ss
->sc
, MXGEFW_CMD_GET_SMALL_RX_OFFSET
, &cmd
);
3272 ss
->rx_data
.rx_small
.lanai
=
3273 (volatile mcp_kreq_ether_recv_t
*)(ss
->sc
->sram
+ cmd
.data0
);
3276 err
|= mxge_send_cmd(ss
->sc
, MXGEFW_CMD_GET_BIG_RX_OFFSET
, &cmd
);
3277 ss
->rx_data
.rx_big
.lanai
=
3278 (volatile mcp_kreq_ether_recv_t
*)(ss
->sc
->sram
+ cmd
.data0
);
3281 if_printf(ss
->sc
->ifp
,
3282 "failed to get ring sizes or locations\n");
3287 * Stock small receive ring
3289 for (i
= 0; i
<= ss
->rx_data
.rx_small
.mask
; i
++) {
3290 err
= mxge_get_buf_small(&ss
->rx_data
.rx_small
,
3291 ss
->rx_data
.rx_small
.info
[i
].map
, i
, TRUE
);
3293 if_printf(ss
->sc
->ifp
, "alloced %d/%d smalls\n", i
,
3294 ss
->rx_data
.rx_small
.mask
+ 1);
3300 * Stock big receive ring
3302 for (i
= 0; i
<= ss
->rx_data
.rx_big
.mask
; i
++) {
3303 ss
->rx_data
.rx_big
.shadow
[i
].addr_low
= 0xffffffff;
3304 ss
->rx_data
.rx_big
.shadow
[i
].addr_high
= 0xffffffff;
3307 ss
->rx_data
.rx_big
.cl_size
= cl_size
;
3309 for (i
= 0; i
<= ss
->rx_data
.rx_big
.mask
; i
++) {
3310 err
= mxge_get_buf_big(&ss
->rx_data
.rx_big
,
3311 ss
->rx_data
.rx_big
.info
[i
].map
, i
, TRUE
);
3313 if_printf(ss
->sc
->ifp
, "alloced %d/%d bigs\n", i
,
3314 ss
->rx_data
.rx_big
.mask
+ 1);
3322 mxge_open(mxge_softc_t
*sc
)
3324 struct ifnet
*ifp
= sc
->ifp
;
3326 int err
, slice
, cl_size
, i
;
3328 volatile uint8_t *itable
;
3329 struct mxge_slice_state
*ss
;
3331 ASSERT_IFNET_SERIALIZED_ALL(ifp
);
3333 /* Copy the MAC address in case it was overridden */
3334 bcopy(IF_LLADDR(ifp
), sc
->mac_addr
, ETHER_ADDR_LEN
);
3336 err
= mxge_reset(sc
, 1);
3338 if_printf(ifp
, "failed to reset\n");
3342 if (sc
->num_slices
> 1) {
3343 /* Setup the indirection table */
3344 cmd
.data0
= sc
->num_slices
;
3345 err
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_RSS_TABLE_SIZE
, &cmd
);
3347 err
|= mxge_send_cmd(sc
, MXGEFW_CMD_GET_RSS_TABLE_OFFSET
, &cmd
);
3349 if_printf(ifp
, "failed to setup rss tables\n");
3353 /* Just enable an identity mapping */
3354 itable
= sc
->sram
+ cmd
.data0
;
3355 for (i
= 0; i
< sc
->num_slices
; i
++)
3356 itable
[i
] = (uint8_t)i
;
3359 volatile uint8_t *hwkey
;
3360 uint8_t swkey
[MXGE_HWRSS_KEYLEN
];
3362 err
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_RSS_KEY_OFFSET
,
3365 if_printf(ifp
, "failed to get rsskey\n");
3368 hwkey
= sc
->sram
+ cmd
.data0
;
3370 toeplitz_get_key(swkey
, MXGE_HWRSS_KEYLEN
);
3371 for (i
= 0; i
< MXGE_HWRSS_KEYLEN
; ++i
)
3372 hwkey
[i
] = swkey
[i
];
3375 err
= mxge_send_cmd(sc
, MXGEFW_CMD_RSS_KEY_UPDATED
,
3378 if_printf(ifp
, "failed to update rsskey\n");
3382 if_printf(ifp
, "RSS key updated\n");
3388 if_printf(ifp
, "input hash: RSS\n");
3389 cmd
.data1
= MXGEFW_RSS_HASH_TYPE_IPV4
|
3390 MXGEFW_RSS_HASH_TYPE_TCP_IPV4
;
3393 if_printf(ifp
, "input hash: SRC_DST_PORT\n");
3394 cmd
.data1
= MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT
;
3396 err
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_RSS_ENABLE
, &cmd
);
3398 if_printf(ifp
, "failed to enable slices\n");
3403 cmd
.data0
= MXGEFW_TSO_MODE_NDIS
;
3404 err
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_TSO_MODE
, &cmd
);
3407 * Can't change TSO mode to NDIS, never allow TSO then
3409 if_printf(ifp
, "failed to set TSO mode\n");
3410 ifp
->if_capenable
&= ~IFCAP_TSO
;
3411 ifp
->if_capabilities
&= ~IFCAP_TSO
;
3412 ifp
->if_hwassist
&= ~CSUM_TSO
;
3415 mxge_choose_params(ifp
->if_mtu
, &cl_size
);
3418 err
= mxge_send_cmd(sc
, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS
, &cmd
);
3420 * Error is only meaningful if we're trying to set
3421 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1
3425 * Give the firmware the mtu and the big and small buffer
3426 * sizes. The firmware wants the big buf size to be a power
3427 * of two. Luckily, DragonFly's clusters are powers of two
3429 cmd
.data0
= ifp
->if_mtu
+ ETHER_HDR_LEN
+ EVL_ENCAPLEN
;
3430 err
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_MTU
, &cmd
);
3432 cmd
.data0
= MXGE_RX_SMALL_BUFLEN
;
3433 err
|= mxge_send_cmd(sc
, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE
, &cmd
);
3435 cmd
.data0
= cl_size
;
3436 err
|= mxge_send_cmd(sc
, MXGEFW_CMD_SET_BIG_BUFFER_SIZE
, &cmd
);
3439 if_printf(ifp
, "failed to setup params\n");
3443 /* Now give him the pointer to the stats block */
3444 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
3445 ss
= &sc
->ss
[slice
];
3446 cmd
.data0
= MXGE_LOWPART_TO_U32(ss
->fw_stats_dma
.dmem_busaddr
);
3447 cmd
.data1
= MXGE_HIGHPART_TO_U32(ss
->fw_stats_dma
.dmem_busaddr
);
3448 cmd
.data2
= sizeof(struct mcp_irq_data
);
3449 cmd
.data2
|= (slice
<< 16);
3450 err
|= mxge_send_cmd(sc
, MXGEFW_CMD_SET_STATS_DMA_V2
, &cmd
);
3454 bus
= sc
->ss
->fw_stats_dma
.dmem_busaddr
;
3455 bus
+= offsetof(struct mcp_irq_data
, send_done_count
);
3456 cmd
.data0
= MXGE_LOWPART_TO_U32(bus
);
3457 cmd
.data1
= MXGE_HIGHPART_TO_U32(bus
);
3458 err
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE
,
3461 /* Firmware cannot support multicast without STATS_DMA_V2 */
3462 sc
->fw_multicast_support
= 0;
3464 sc
->fw_multicast_support
= 1;
3468 if_printf(ifp
, "failed to setup params\n");
3472 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
3473 err
= mxge_slice_open(&sc
->ss
[slice
], cl_size
);
3475 if_printf(ifp
, "couldn't open slice %d\n", slice
);
3480 /* Finally, start the firmware running */
3481 err
= mxge_send_cmd(sc
, MXGEFW_CMD_ETHERNET_UP
, &cmd
);
3483 if_printf(ifp
, "Couldn't bring up link\n");
3487 ifp
->if_flags
|= IFF_RUNNING
;
3488 for (i
= 0; i
< sc
->num_tx_rings
; ++i
) {
3489 mxge_tx_ring_t
*tx
= &sc
->ss
[i
].tx
;
3491 ifsq_clr_oactive(tx
->ifsq
);
3492 ifsq_watchdog_start(&tx
->watchdog
);
3498 mxge_free_mbufs(sc
);
3503 mxge_close(mxge_softc_t
*sc
, int down
)
3505 struct ifnet
*ifp
= sc
->ifp
;
3507 int err
, old_down_cnt
, i
;
3509 ASSERT_IFNET_SERIALIZED_ALL(ifp
);
3512 old_down_cnt
= sc
->down_cnt
;
3515 err
= mxge_send_cmd(sc
, MXGEFW_CMD_ETHERNET_DOWN
, &cmd
);
3517 if_printf(ifp
, "Couldn't bring down link\n");
3519 if (old_down_cnt
== sc
->down_cnt
) {
3524 ifnet_deserialize_all(ifp
);
3525 DELAY(10 * sc
->intr_coal_delay
);
3526 ifnet_serialize_all(ifp
);
3530 if (old_down_cnt
== sc
->down_cnt
)
3531 if_printf(ifp
, "never got down irq\n");
3533 mxge_free_mbufs(sc
);
3535 ifp
->if_flags
&= ~IFF_RUNNING
;
3536 for (i
= 0; i
< sc
->num_tx_rings
; ++i
) {
3537 mxge_tx_ring_t
*tx
= &sc
->ss
[i
].tx
;
3539 ifsq_clr_oactive(tx
->ifsq
);
3540 ifsq_watchdog_stop(&tx
->watchdog
);
3545 mxge_setup_cfg_space(mxge_softc_t
*sc
)
3547 device_t dev
= sc
->dev
;
3549 uint16_t lnk
, pectl
;
3551 /* Find the PCIe link width and set max read request to 4KB */
3552 if (pci_find_extcap(dev
, PCIY_EXPRESS
, ®
) == 0) {
3553 lnk
= pci_read_config(dev
, reg
+ 0x12, 2);
3554 sc
->link_width
= (lnk
>> 4) & 0x3f;
3556 if (sc
->pectl
== 0) {
3557 pectl
= pci_read_config(dev
, reg
+ 0x8, 2);
3558 pectl
= (pectl
& ~0x7000) | (5 << 12);
3559 pci_write_config(dev
, reg
+ 0x8, pectl
, 2);
3562 /* Restore saved pectl after watchdog reset */
3563 pci_write_config(dev
, reg
+ 0x8, sc
->pectl
, 2);
3567 /* Enable DMA and memory space access */
3568 pci_enable_busmaster(dev
);
3572 mxge_read_reboot(mxge_softc_t
*sc
)
3574 device_t dev
= sc
->dev
;
3577 /* Find the vendor specific offset */
3578 if (pci_find_extcap(dev
, PCIY_VENDOR
, &vs
) != 0) {
3579 if_printf(sc
->ifp
, "could not find vendor specific offset\n");
3580 return (uint32_t)-1;
3582 /* Enable read32 mode */
3583 pci_write_config(dev
, vs
+ 0x10, 0x3, 1);
3584 /* Tell NIC which register to read */
3585 pci_write_config(dev
, vs
+ 0x18, 0xfffffff0, 4);
3586 return pci_read_config(dev
, vs
+ 0x14, 4);
3590 mxge_watchdog_reset(mxge_softc_t
*sc
)
3592 struct pci_devinfo
*dinfo
;
3599 if_printf(sc
->ifp
, "Watchdog reset!\n");
3602 * Check to see if the NIC rebooted. If it did, then all of
3603 * PCI config space has been reset, and things like the
3604 * busmaster bit will be zero. If this is the case, then we
3605 * must restore PCI config space before the NIC can be used
3608 cmd
= pci_read_config(sc
->dev
, PCIR_COMMAND
, 2);
3609 if (cmd
== 0xffff) {
3611 * Maybe the watchdog caught the NIC rebooting; wait
3612 * up to 100ms for it to finish. If it does not come
3613 * back, then give up
3616 cmd
= pci_read_config(sc
->dev
, PCIR_COMMAND
, 2);
3618 if_printf(sc
->ifp
, "NIC disappeared!\n");
3620 if ((cmd
& PCIM_CMD_BUSMASTEREN
) == 0) {
3621 /* Print the reboot status */
3622 reboot
= mxge_read_reboot(sc
);
3623 if_printf(sc
->ifp
, "NIC rebooted, status = 0x%x\n", reboot
);
3625 running
= sc
->ifp
->if_flags
& IFF_RUNNING
;
3628 * Quiesce NIC so that TX routines will not try to
3629 * xmit after restoration of BAR
3632 /* Mark the link as down */
3633 if (sc
->link_state
) {
3634 sc
->ifp
->if_link_state
= LINK_STATE_DOWN
;
3635 if_link_state_change(sc
->ifp
);
3639 /* Restore PCI configuration space */
3640 dinfo
= device_get_ivars(sc
->dev
);
3641 pci_cfg_restore(sc
->dev
, dinfo
);
3643 /* And redo any changes we made to our config space */
3644 mxge_setup_cfg_space(sc
);
3647 err
= mxge_load_firmware(sc
, 0);
3649 if_printf(sc
->ifp
, "Unable to re-load f/w\n");
3650 if (running
&& !err
) {
3653 err
= mxge_open(sc
);
3655 for (i
= 0; i
< sc
->num_tx_rings
; ++i
)
3656 ifsq_devstart_sched(sc
->ss
[i
].tx
.ifsq
);
3658 sc
->watchdog_resets
++;
3660 if_printf(sc
->ifp
, "NIC did not reboot, not resetting\n");
3664 if_printf(sc
->ifp
, "watchdog reset failed\n");
3668 callout_reset(&sc
->co_hdl
, mxge_ticks
, mxge_tick
, sc
);
3673 mxge_warn_stuck(mxge_softc_t
*sc
, mxge_tx_ring_t
*tx
, int slice
)
3675 if_printf(sc
->ifp
, "slice %d struck? ring state:\n", slice
);
3676 if_printf(sc
->ifp
, "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3677 tx
->req
, tx
->done
, tx
->queue_active
);
3678 if_printf(sc
->ifp
, "tx.activate=%d tx.deactivate=%d\n",
3679 tx
->activate
, tx
->deactivate
);
3680 if_printf(sc
->ifp
, "pkt_done=%d fw=%d\n",
3681 tx
->pkt_done
, be32toh(sc
->ss
->fw_stats
->send_done_count
));
3685 mxge_update_stats(mxge_softc_t
*sc
)
3687 u_long ipackets
, opackets
, pkts
;
3689 IFNET_STAT_GET(sc
->ifp
, ipackets
, ipackets
);
3690 IFNET_STAT_GET(sc
->ifp
, opackets
, opackets
);
3692 pkts
= ipackets
- sc
->ipackets
;
3693 pkts
+= opackets
- sc
->opackets
;
3695 sc
->ipackets
= ipackets
;
3696 sc
->opackets
= opackets
;
3702 mxge_tick(void *arg
)
3704 mxge_softc_t
*sc
= arg
;
3709 lwkt_serialize_enter(&sc
->main_serialize
);
3712 if (sc
->ifp
->if_flags
& IFF_RUNNING
) {
3713 /* Aggregate stats from different slices */
3714 pkts
= mxge_update_stats(sc
);
3715 if (sc
->need_media_probe
)
3716 mxge_media_probe(sc
);
3721 /* Ensure NIC did not suffer h/w fault while idle */
3722 cmd
= pci_read_config(sc
->dev
, PCIR_COMMAND
, 2);
3723 if ((cmd
& PCIM_CMD_BUSMASTEREN
) == 0) {
3725 mxge_serialize_skipmain(sc
);
3726 mxge_watchdog_reset(sc
);
3727 mxge_deserialize_skipmain(sc
);
3731 /* Look less often if NIC is idle */
3736 callout_reset(&sc
->co_hdl
, ticks
, mxge_tick
, sc
);
3738 lwkt_serialize_exit(&sc
->main_serialize
);
3742 mxge_media_change(struct ifnet
*ifp
)
3744 mxge_softc_t
*sc
= ifp
->if_softc
;
3745 const struct ifmedia
*ifm
= &sc
->media
;
3748 if (IFM_OPTIONS(ifm
->ifm_media
) & (IFM_ETH_RXPAUSE
| IFM_ETH_TXPAUSE
)) {
3757 return mxge_change_pause(sc
, pause
);
3761 mxge_change_mtu(mxge_softc_t
*sc
, int mtu
)
3763 struct ifnet
*ifp
= sc
->ifp
;
3764 int real_mtu
, old_mtu
;
3767 real_mtu
= mtu
+ ETHER_HDR_LEN
+ EVL_ENCAPLEN
;
3768 if (mtu
> sc
->max_mtu
|| real_mtu
< 60)
3771 old_mtu
= ifp
->if_mtu
;
3773 if (ifp
->if_flags
& IFF_RUNNING
) {
3775 err
= mxge_open(sc
);
3777 ifp
->if_mtu
= old_mtu
;
3786 mxge_media_status(struct ifnet
*ifp
, struct ifmediareq
*ifmr
)
3788 mxge_softc_t
*sc
= ifp
->if_softc
;
3790 ifmr
->ifm_status
= IFM_AVALID
;
3791 ifmr
->ifm_active
= IFM_ETHER
;
3794 ifmr
->ifm_status
|= IFM_ACTIVE
;
3797 * Autoselect is not supported, so the current media
3798 * should be delivered.
3800 ifmr
->ifm_active
|= sc
->current_media
;
3801 if (sc
->current_media
!= IFM_NONE
) {
3802 ifmr
->ifm_active
|= MXGE_IFM
;
3804 ifmr
->ifm_active
|= IFM_ETH_RXPAUSE
| IFM_ETH_TXPAUSE
;
3809 mxge_ioctl(struct ifnet
*ifp
, u_long command
, caddr_t data
,
3810 struct ucred
*cr __unused
)
3812 mxge_softc_t
*sc
= ifp
->if_softc
;
3813 struct ifreq
*ifr
= (struct ifreq
*)data
;
3816 ASSERT_IFNET_SERIALIZED_ALL(ifp
);
3821 err
= mxge_change_mtu(sc
, ifr
->ifr_mtu
);
3828 if (ifp
->if_flags
& IFF_UP
) {
3829 if (!(ifp
->if_flags
& IFF_RUNNING
)) {
3830 err
= mxge_open(sc
);
3833 * Take care of PROMISC and ALLMULTI
3836 mxge_change_promisc(sc
,
3837 ifp
->if_flags
& IFF_PROMISC
);
3838 mxge_set_multicast_list(sc
);
3841 if (ifp
->if_flags
& IFF_RUNNING
)
3848 mxge_set_multicast_list(sc
);
3852 mask
= ifr
->ifr_reqcap
^ ifp
->if_capenable
;
3853 if (mask
& IFCAP_TXCSUM
) {
3854 ifp
->if_capenable
^= IFCAP_TXCSUM
;
3855 if (ifp
->if_capenable
& IFCAP_TXCSUM
)
3856 ifp
->if_hwassist
|= CSUM_TCP
| CSUM_UDP
;
3858 ifp
->if_hwassist
&= ~(CSUM_TCP
| CSUM_UDP
);
3860 if (mask
& IFCAP_TSO
) {
3861 ifp
->if_capenable
^= IFCAP_TSO
;
3862 if (ifp
->if_capenable
& IFCAP_TSO
)
3863 ifp
->if_hwassist
|= CSUM_TSO
;
3865 ifp
->if_hwassist
&= ~CSUM_TSO
;
3867 if (mask
& IFCAP_RXCSUM
)
3868 ifp
->if_capenable
^= IFCAP_RXCSUM
;
3869 if (mask
& IFCAP_VLAN_HWTAGGING
)
3870 ifp
->if_capenable
^= IFCAP_VLAN_HWTAGGING
;
3875 err
= ifmedia_ioctl(ifp
, (struct ifreq
*)data
,
3876 &sc
->media
, command
);
3880 err
= ether_ioctl(ifp
, command
, data
);
3887 mxge_fetch_tunables(mxge_softc_t
*sc
)
3891 sc
->intr_coal_delay
= mxge_intr_coal_delay
;
3892 if (sc
->intr_coal_delay
< 0 || sc
->intr_coal_delay
> (10 * 1000))
3893 sc
->intr_coal_delay
= MXGE_INTR_COAL_DELAY
;
3896 if (mxge_ticks
== 0)
3897 mxge_ticks
= hz
/ 2;
3899 ifm
= ifmedia_str2ethfc(mxge_flowctrl
);
3900 if (ifm
& (IFM_ETH_RXPAUSE
| IFM_ETH_TXPAUSE
))
3903 sc
->use_rss
= mxge_use_rss
;
3905 sc
->throttle
= mxge_throttle
;
3906 if (sc
->throttle
&& sc
->throttle
> MXGE_MAX_THROTTLE
)
3907 sc
->throttle
= MXGE_MAX_THROTTLE
;
3908 if (sc
->throttle
&& sc
->throttle
< MXGE_MIN_THROTTLE
)
3909 sc
->throttle
= MXGE_MIN_THROTTLE
;
3913 mxge_free_slices(mxge_softc_t
*sc
)
3915 struct mxge_slice_state
*ss
;
3921 for (i
= 0; i
< sc
->num_slices
; i
++) {
3923 if (ss
->fw_stats
!= NULL
) {
3924 mxge_dma_free(&ss
->fw_stats_dma
);
3925 ss
->fw_stats
= NULL
;
3927 if (ss
->rx_data
.rx_done
.entry
!= NULL
) {
3928 mxge_dma_free(&ss
->rx_done_dma
);
3929 ss
->rx_data
.rx_done
.entry
= NULL
;
3932 kfree(sc
->ss
, M_DEVBUF
);
3937 mxge_alloc_slices(mxge_softc_t
*sc
)
3940 struct mxge_slice_state
*ss
;
3942 int err
, i
, rx_ring_size
;
3944 err
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_RX_RING_SIZE
, &cmd
);
3946 device_printf(sc
->dev
, "Cannot determine rx ring size\n");
3949 rx_ring_size
= cmd
.data0
;
3950 sc
->rx_intr_slots
= 2 * (rx_ring_size
/ sizeof (mcp_dma_addr_t
));
3952 bytes
= sizeof(*sc
->ss
) * sc
->num_slices
;
3953 sc
->ss
= kmalloc_cachealign(bytes
, M_DEVBUF
, M_WAITOK
| M_ZERO
);
3955 for (i
= 0; i
< sc
->num_slices
; i
++) {
3960 lwkt_serialize_init(&ss
->rx_data
.rx_serialize
);
3961 lwkt_serialize_init(&ss
->tx
.tx_serialize
);
3965 * Allocate per-slice rx interrupt queue
3966 * XXX assume 4bytes mcp_slot
3968 bytes
= sc
->rx_intr_slots
* sizeof(mcp_slot_t
);
3969 err
= mxge_dma_alloc(sc
, &ss
->rx_done_dma
, bytes
, 4096);
3971 device_printf(sc
->dev
,
3972 "alloc %d slice rx_done failed\n", i
);
3975 ss
->rx_data
.rx_done
.entry
= ss
->rx_done_dma
.dmem_addr
;
3978 * Allocate the per-slice firmware stats
3980 bytes
= sizeof(*ss
->fw_stats
);
3981 err
= mxge_dma_alloc(sc
, &ss
->fw_stats_dma
,
3982 sizeof(*ss
->fw_stats
), 64);
3984 device_printf(sc
->dev
,
3985 "alloc %d fw_stats failed\n", i
);
3988 ss
->fw_stats
= ss
->fw_stats_dma
.dmem_addr
;
3994 mxge_slice_probe(mxge_softc_t
*sc
)
3996 int status
, max_intr_slots
, max_slices
, num_slices
;
3997 int msix_cnt
, msix_enable
, i
, multi_tx
;
4002 sc
->num_tx_rings
= 1;
4004 num_slices
= device_getenv_int(sc
->dev
, "num_slices", mxge_num_slices
);
4005 if (num_slices
== 1)
4011 msix_enable
= device_getenv_int(sc
->dev
, "msix.enable",
4016 msix_cnt
= pci_msix_count(sc
->dev
);
4021 * Round down MSI-X vector count to the nearest power of 2
4024 while ((1 << (i
+ 1)) <= msix_cnt
)
4029 * Now load the slice aware firmware see what it supports
4031 old_fw
= sc
->fw_name
;
4032 if (old_fw
== mxge_fw_aligned
)
4033 sc
->fw_name
= mxge_fw_rss_aligned
;
4035 sc
->fw_name
= mxge_fw_rss_unaligned
;
4036 status
= mxge_load_firmware(sc
, 0);
4038 device_printf(sc
->dev
, "Falling back to a single slice\n");
4043 * Try to send a reset command to the card to see if it is alive
4045 memset(&cmd
, 0, sizeof(cmd
));
4046 status
= mxge_send_cmd(sc
, MXGEFW_CMD_RESET
, &cmd
);
4048 device_printf(sc
->dev
, "failed reset\n");
4053 * Get rx ring size to calculate rx interrupt queue size
4055 status
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_RX_RING_SIZE
, &cmd
);
4057 device_printf(sc
->dev
, "Cannot determine rx ring size\n");
4060 max_intr_slots
= 2 * (cmd
.data0
/ sizeof(mcp_dma_addr_t
));
4063 * Tell it the size of the rx interrupt queue
4065 cmd
.data0
= max_intr_slots
* sizeof(struct mcp_slot
);
4066 status
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_INTRQ_SIZE
, &cmd
);
4068 device_printf(sc
->dev
, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4073 * Ask the maximum number of slices it supports
4075 status
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_MAX_RSS_QUEUES
, &cmd
);
4077 device_printf(sc
->dev
,
4078 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4081 max_slices
= cmd
.data0
;
4084 * Round down max slices count to the nearest power of 2
4087 while ((1 << (i
+ 1)) <= max_slices
)
4089 max_slices
= 1 << i
;
4091 if (max_slices
> msix_cnt
)
4092 max_slices
= msix_cnt
;
4094 sc
->num_slices
= num_slices
;
4095 sc
->num_slices
= if_ring_count2(sc
->num_slices
, max_slices
);
4097 multi_tx
= device_getenv_int(sc
->dev
, "multi_tx", mxge_multi_tx
);
4099 sc
->num_tx_rings
= sc
->num_slices
;
4102 device_printf(sc
->dev
, "using %d slices, max %d\n",
4103 sc
->num_slices
, max_slices
);
4106 if (sc
->num_slices
== 1)
4111 sc
->fw_name
= old_fw
;
4112 mxge_load_firmware(sc
, 0);
4116 mxge_setup_serialize(struct mxge_softc
*sc
)
4120 /* Main + rx + tx */
4121 sc
->nserialize
= (2 * sc
->num_slices
) + 1;
4123 kmalloc(sc
->nserialize
* sizeof(struct lwkt_serialize
*),
4124 M_DEVBUF
, M_WAITOK
| M_ZERO
);
4129 * NOTE: Order is critical
4132 KKASSERT(i
< sc
->nserialize
);
4133 sc
->serializes
[i
++] = &sc
->main_serialize
;
4135 for (slice
= 0; slice
< sc
->num_slices
; ++slice
) {
4136 KKASSERT(i
< sc
->nserialize
);
4137 sc
->serializes
[i
++] = &sc
->ss
[slice
].rx_data
.rx_serialize
;
4140 for (slice
= 0; slice
< sc
->num_slices
; ++slice
) {
4141 KKASSERT(i
< sc
->nserialize
);
4142 sc
->serializes
[i
++] = &sc
->ss
[slice
].tx
.tx_serialize
;
4145 KKASSERT(i
== sc
->nserialize
);
4149 mxge_serialize(struct ifnet
*ifp
, enum ifnet_serialize slz
)
4151 struct mxge_softc
*sc
= ifp
->if_softc
;
4153 ifnet_serialize_array_enter(sc
->serializes
, sc
->nserialize
, slz
);
4157 mxge_deserialize(struct ifnet
*ifp
, enum ifnet_serialize slz
)
4159 struct mxge_softc
*sc
= ifp
->if_softc
;
4161 ifnet_serialize_array_exit(sc
->serializes
, sc
->nserialize
, slz
);
4165 mxge_tryserialize(struct ifnet
*ifp
, enum ifnet_serialize slz
)
4167 struct mxge_softc
*sc
= ifp
->if_softc
;
4169 return ifnet_serialize_array_try(sc
->serializes
, sc
->nserialize
, slz
);
4175 mxge_serialize_assert(struct ifnet
*ifp
, enum ifnet_serialize slz
,
4176 boolean_t serialized
)
4178 struct mxge_softc
*sc
= ifp
->if_softc
;
4180 ifnet_serialize_array_assert(sc
->serializes
, sc
->nserialize
,
4184 #endif /* INVARIANTS */
4186 #ifdef IFPOLL_ENABLE
4189 mxge_npoll_rx(struct ifnet
*ifp
, void *xss
, int cycle
)
4191 struct mxge_slice_state
*ss
= xss
;
4192 mxge_rx_done_t
*rx_done
= &ss
->rx_data
.rx_done
;
4194 ASSERT_SERIALIZED(&ss
->rx_data
.rx_serialize
);
4196 if (rx_done
->entry
[rx_done
->idx
].length
!= 0) {
4197 mxge_clean_rx_done(&ss
->sc
->arpcom
.ac_if
, &ss
->rx_data
, cycle
);
4201 * This register writting obviously has cost,
4202 * however, if we don't hand back the rx token,
4203 * the upcoming packets may suffer rediculously
4204 * large delay, as observed on 8AL-C using ping(8).
4206 *ss
->irq_claim
= be32toh(3);
4211 mxge_npoll(struct ifnet
*ifp
, struct ifpoll_info
*info
)
4213 struct mxge_softc
*sc
= ifp
->if_softc
;
4220 * Only poll rx; polling tx and status don't seem to work
4222 for (i
= 0; i
< sc
->num_slices
; ++i
) {
4223 struct mxge_slice_state
*ss
= &sc
->ss
[i
];
4224 int idx
= ss
->intr_cpuid
;
4226 KKASSERT(idx
< ncpus2
);
4227 info
->ifpi_rx
[idx
].poll_func
= mxge_npoll_rx
;
4228 info
->ifpi_rx
[idx
].arg
= ss
;
4229 info
->ifpi_rx
[idx
].serializer
= &ss
->rx_data
.rx_serialize
;
4233 #endif /* IFPOLL_ENABLE */
4236 mxge_attach(device_t dev
)
4238 mxge_softc_t
*sc
= device_get_softc(dev
);
4239 struct ifnet
*ifp
= &sc
->arpcom
.ac_if
;
4243 * Avoid rewriting half the lines in this file to use
4244 * &sc->arpcom.ac_if instead
4248 if_initname(ifp
, device_get_name(dev
), device_get_unit(dev
));
4250 /* IFM_ETH_FORCEPAUSE can't be changed */
4251 ifmedia_init(&sc
->media
, IFM_ETH_RXPAUSE
| IFM_ETH_TXPAUSE
,
4252 mxge_media_change
, mxge_media_status
);
4254 lwkt_serialize_init(&sc
->main_serialize
);
4256 mxge_fetch_tunables(sc
);
4258 err
= bus_dma_tag_create(NULL
, /* parent */
4261 BUS_SPACE_MAXADDR
, /* low */
4262 BUS_SPACE_MAXADDR
, /* high */
4263 NULL
, NULL
, /* filter */
4264 BUS_SPACE_MAXSIZE_32BIT
,/* maxsize */
4266 BUS_SPACE_MAXSIZE_32BIT
,/* maxsegsize */
4268 &sc
->parent_dmat
); /* tag */
4270 device_printf(dev
, "Err %d allocating parent dmat\n", err
);
4274 callout_init_mp(&sc
->co_hdl
);
4276 mxge_setup_cfg_space(sc
);
4279 * Map the board into the kernel
4282 sc
->mem_res
= bus_alloc_resource_any(dev
, SYS_RES_MEMORY
,
4284 if (sc
->mem_res
== NULL
) {
4285 device_printf(dev
, "could not map memory\n");
4290 sc
->sram
= rman_get_virtual(sc
->mem_res
);
4291 sc
->sram_size
= 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4292 if (sc
->sram_size
> rman_get_size(sc
->mem_res
)) {
4293 device_printf(dev
, "impossible memory region size %ld\n",
4294 rman_get_size(sc
->mem_res
));
4300 * Make NULL terminated copy of the EEPROM strings section of
4303 bzero(sc
->eeprom_strings
, MXGE_EEPROM_STRINGS_SIZE
);
4304 bus_space_read_region_1(rman_get_bustag(sc
->mem_res
),
4305 rman_get_bushandle(sc
->mem_res
),
4306 sc
->sram_size
- MXGE_EEPROM_STRINGS_SIZE
,
4307 sc
->eeprom_strings
, MXGE_EEPROM_STRINGS_SIZE
- 2);
4308 err
= mxge_parse_strings(sc
);
4310 device_printf(dev
, "parse EEPROM string failed\n");
4315 * Enable write combining for efficient use of PCIe bus
4320 * Allocate the out of band DMA memory
4322 err
= mxge_dma_alloc(sc
, &sc
->cmd_dma
, sizeof(mxge_cmd_t
), 64);
4324 device_printf(dev
, "alloc cmd DMA buf failed\n");
4327 sc
->cmd
= sc
->cmd_dma
.dmem_addr
;
4329 err
= mxge_dma_alloc(sc
, &sc
->zeropad_dma
, 64, 64);
4331 device_printf(dev
, "alloc zeropad DMA buf failed\n");
4335 err
= mxge_dma_alloc(sc
, &sc
->dmabench_dma
, 4096, 4096);
4337 device_printf(dev
, "alloc dmabench DMA buf failed\n");
4341 /* Select & load the firmware */
4342 err
= mxge_select_firmware(sc
);
4344 device_printf(dev
, "select firmware failed\n");
4348 mxge_slice_probe(sc
);
4349 err
= mxge_alloc_slices(sc
);
4351 device_printf(dev
, "alloc slices failed\n");
4355 err
= mxge_alloc_intr(sc
);
4357 device_printf(dev
, "alloc intr failed\n");
4361 /* Setup serializes */
4362 mxge_setup_serialize(sc
);
4364 err
= mxge_reset(sc
, 0);
4366 device_printf(dev
, "reset failed\n");
4370 err
= mxge_alloc_rings(sc
);
4372 device_printf(dev
, "failed to allocate rings\n");
4376 ifp
->if_baudrate
= IF_Gbps(10UL);
4377 ifp
->if_capabilities
= IFCAP_RXCSUM
| IFCAP_TXCSUM
| IFCAP_TSO
;
4378 ifp
->if_hwassist
= CSUM_TCP
| CSUM_UDP
| CSUM_TSO
;
4380 ifp
->if_capabilities
|= IFCAP_VLAN_MTU
;
4382 /* Well, its software, sigh */
4383 ifp
->if_capabilities
|= IFCAP_VLAN_HWTAGGING
;
4385 ifp
->if_capenable
= ifp
->if_capabilities
;
4388 ifp
->if_flags
= IFF_BROADCAST
| IFF_SIMPLEX
| IFF_MULTICAST
;
4389 ifp
->if_init
= mxge_init
;
4390 ifp
->if_ioctl
= mxge_ioctl
;
4391 ifp
->if_start
= mxge_start
;
4392 #ifdef IFPOLL_ENABLE
4393 if (sc
->intr_type
!= PCI_INTR_TYPE_LEGACY
)
4394 ifp
->if_npoll
= mxge_npoll
;
4396 ifp
->if_serialize
= mxge_serialize
;
4397 ifp
->if_deserialize
= mxge_deserialize
;
4398 ifp
->if_tryserialize
= mxge_tryserialize
;
4400 ifp
->if_serialize_assert
= mxge_serialize_assert
;
4403 /* Increase TSO burst length */
4404 ifp
->if_tsolen
= (32 * ETHERMTU
);
4406 /* Initialise the ifmedia structure */
4407 mxge_media_init(sc
);
4408 mxge_media_probe(sc
);
4410 ether_ifattach(ifp
, sc
->mac_addr
, NULL
);
4412 /* Setup TX rings and subqueues */
4413 for (i
= 0; i
< sc
->num_tx_rings
; ++i
) {
4414 struct ifaltq_subque
*ifsq
= ifq_get_subq(&ifp
->if_snd
, i
);
4415 struct mxge_slice_state
*ss
= &sc
->ss
[i
];
4417 ifsq_set_cpuid(ifsq
, ss
->intr_cpuid
);
4418 ifsq_set_hw_serialize(ifsq
, &ss
->tx
.tx_serialize
);
4419 ifsq_set_priv(ifsq
, &ss
->tx
);
4422 ifsq_watchdog_init(&ss
->tx
.watchdog
, ifsq
, mxge_watchdog
);
4427 * We are not ready to do "gather" jumbo frame, so
4428 * limit MTU to MJUMPAGESIZE
4430 sc
->max_mtu
= MJUMPAGESIZE
-
4431 ETHER_HDR_LEN
- EVL_ENCAPLEN
- MXGEFW_PAD
- 1;
4434 err
= mxge_setup_intr(sc
);
4436 device_printf(dev
, "alloc and setup intr failed\n");
4437 ether_ifdetach(ifp
);
4441 mxge_add_sysctls(sc
);
4443 /* Increase non-cluster mbuf limit; used by small RX rings */
4444 mb_inclimit(ifp
->if_nmbclusters
);
4446 callout_reset_bycpu(&sc
->co_hdl
, mxge_ticks
, mxge_tick
, sc
,
4447 sc
->ss
[0].intr_cpuid
);
4456 mxge_detach(device_t dev
)
4458 mxge_softc_t
*sc
= device_get_softc(dev
);
4460 if (device_is_attached(dev
)) {
4461 struct ifnet
*ifp
= sc
->ifp
;
4462 int mblimit
= ifp
->if_nmbclusters
;
4464 ifnet_serialize_all(ifp
);
4467 if (ifp
->if_flags
& IFF_RUNNING
)
4469 callout_stop(&sc
->co_hdl
);
4471 mxge_teardown_intr(sc
, sc
->num_slices
);
4473 ifnet_deserialize_all(ifp
);
4475 callout_terminate(&sc
->co_hdl
);
4477 ether_ifdetach(ifp
);
4479 /* Decrease non-cluster mbuf limit increased by us */
4480 mb_inclimit(-mblimit
);
4482 ifmedia_removeall(&sc
->media
);
4484 if (sc
->cmd
!= NULL
&& sc
->zeropad_dma
.dmem_addr
!= NULL
&&
4486 mxge_dummy_rdma(sc
, 0);
4489 mxge_rem_sysctls(sc
);
4490 mxge_free_rings(sc
);
4492 /* MUST after sysctls, intr and rings are freed */
4493 mxge_free_slices(sc
);
4495 if (sc
->dmabench_dma
.dmem_addr
!= NULL
)
4496 mxge_dma_free(&sc
->dmabench_dma
);
4497 if (sc
->zeropad_dma
.dmem_addr
!= NULL
)
4498 mxge_dma_free(&sc
->zeropad_dma
);
4499 if (sc
->cmd_dma
.dmem_addr
!= NULL
)
4500 mxge_dma_free(&sc
->cmd_dma
);
4502 if (sc
->msix_table_res
!= NULL
) {
4503 bus_release_resource(dev
, SYS_RES_MEMORY
, PCIR_BAR(2),
4504 sc
->msix_table_res
);
4506 if (sc
->mem_res
!= NULL
) {
4507 bus_release_resource(dev
, SYS_RES_MEMORY
, PCIR_BARS
,
4511 if (sc
->parent_dmat
!= NULL
)
4512 bus_dma_tag_destroy(sc
->parent_dmat
);
4518 mxge_shutdown(device_t dev
)
4524 mxge_free_msix(struct mxge_softc
*sc
, boolean_t setup
)
4528 KKASSERT(sc
->num_slices
> 1);
4530 for (i
= 0; i
< sc
->num_slices
; ++i
) {
4531 struct mxge_slice_state
*ss
= &sc
->ss
[i
];
4533 if (ss
->intr_res
!= NULL
) {
4534 bus_release_resource(sc
->dev
, SYS_RES_IRQ
,
4535 ss
->intr_rid
, ss
->intr_res
);
4537 if (ss
->intr_rid
>= 0)
4538 pci_release_msix_vector(sc
->dev
, ss
->intr_rid
);
4541 pci_teardown_msix(sc
->dev
);
4545 mxge_alloc_msix(struct mxge_softc
*sc
)
4547 struct mxge_slice_state
*ss
;
4548 int offset
, rid
, error
, i
;
4549 boolean_t setup
= FALSE
;
4551 KKASSERT(sc
->num_slices
> 1);
4553 if (sc
->num_slices
== ncpus2
) {
4558 offset_def
= (sc
->num_slices
* device_get_unit(sc
->dev
)) %
4561 offset
= device_getenv_int(sc
->dev
, "msix.offset", offset_def
);
4562 if (offset
>= ncpus2
||
4563 offset
% sc
->num_slices
!= 0) {
4564 device_printf(sc
->dev
, "invalid msix.offset %d, "
4565 "use %d\n", offset
, offset_def
);
4566 offset
= offset_def
;
4572 ss
->intr_serialize
= &sc
->main_serialize
;
4573 ss
->intr_func
= mxge_msi
;
4574 ksnprintf(ss
->intr_desc0
, sizeof(ss
->intr_desc0
),
4575 "%s comb", device_get_nameunit(sc
->dev
));
4576 ss
->intr_desc
= ss
->intr_desc0
;
4577 ss
->intr_cpuid
= offset
;
4579 for (i
= 1; i
< sc
->num_slices
; ++i
) {
4582 ss
->intr_serialize
= &ss
->rx_data
.rx_serialize
;
4583 if (sc
->num_tx_rings
== 1) {
4584 ss
->intr_func
= mxge_msix_rx
;
4585 ksnprintf(ss
->intr_desc0
, sizeof(ss
->intr_desc0
),
4586 "%s rx", device_get_nameunit(sc
->dev
));
4588 ss
->intr_func
= mxge_msix_rxtx
;
4589 ksnprintf(ss
->intr_desc0
, sizeof(ss
->intr_desc0
),
4590 "%s rxtx", device_get_nameunit(sc
->dev
));
4592 ss
->intr_desc
= ss
->intr_desc0
;
4593 ss
->intr_cpuid
= offset
+ i
;
4597 sc
->msix_table_res
= bus_alloc_resource_any(sc
->dev
, SYS_RES_MEMORY
,
4599 if (sc
->msix_table_res
== NULL
) {
4600 device_printf(sc
->dev
, "couldn't alloc MSI-X table res\n");
4604 error
= pci_setup_msix(sc
->dev
);
4606 device_printf(sc
->dev
, "could not setup MSI-X\n");
4611 for (i
= 0; i
< sc
->num_slices
; ++i
) {
4614 error
= pci_alloc_msix_vector(sc
->dev
, i
, &ss
->intr_rid
,
4617 device_printf(sc
->dev
, "could not alloc "
4618 "MSI-X %d on cpu%d\n", i
, ss
->intr_cpuid
);
4622 ss
->intr_res
= bus_alloc_resource_any(sc
->dev
, SYS_RES_IRQ
,
4623 &ss
->intr_rid
, RF_ACTIVE
);
4624 if (ss
->intr_res
== NULL
) {
4625 device_printf(sc
->dev
, "could not alloc "
4626 "MSI-X %d resource\n", i
);
4632 pci_enable_msix(sc
->dev
);
4633 sc
->intr_type
= PCI_INTR_TYPE_MSIX
;
4636 mxge_free_msix(sc
, setup
);
4641 mxge_alloc_intr(struct mxge_softc
*sc
)
4643 struct mxge_slice_state
*ss
;
4646 if (sc
->num_slices
> 1) {
4649 error
= mxge_alloc_msix(sc
);
4652 KKASSERT(sc
->intr_type
== PCI_INTR_TYPE_MSIX
);
4658 sc
->intr_type
= pci_alloc_1intr(sc
->dev
, mxge_msi_enable
,
4659 &ss
->intr_rid
, &irq_flags
);
4661 ss
->intr_res
= bus_alloc_resource_any(sc
->dev
, SYS_RES_IRQ
,
4662 &ss
->intr_rid
, irq_flags
);
4663 if (ss
->intr_res
== NULL
) {
4664 device_printf(sc
->dev
, "could not alloc interrupt\n");
4668 if (sc
->intr_type
== PCI_INTR_TYPE_LEGACY
)
4669 ss
->intr_func
= mxge_legacy
;
4671 ss
->intr_func
= mxge_msi
;
4672 ss
->intr_serialize
= &sc
->main_serialize
;
4673 ss
->intr_cpuid
= rman_get_cpuid(ss
->intr_res
);
4679 mxge_setup_intr(struct mxge_softc
*sc
)
4683 for (i
= 0; i
< sc
->num_slices
; ++i
) {
4684 struct mxge_slice_state
*ss
= &sc
->ss
[i
];
4687 error
= bus_setup_intr_descr(sc
->dev
, ss
->intr_res
,
4688 INTR_MPSAFE
, ss
->intr_func
, ss
, &ss
->intr_hand
,
4689 ss
->intr_serialize
, ss
->intr_desc
);
4691 device_printf(sc
->dev
, "can't setup %dth intr\n", i
);
4692 mxge_teardown_intr(sc
, i
);
4700 mxge_teardown_intr(struct mxge_softc
*sc
, int cnt
)
4707 for (i
= 0; i
< cnt
; ++i
) {
4708 struct mxge_slice_state
*ss
= &sc
->ss
[i
];
4710 bus_teardown_intr(sc
->dev
, ss
->intr_res
, ss
->intr_hand
);
4715 mxge_free_intr(struct mxge_softc
*sc
)
4720 if (sc
->intr_type
!= PCI_INTR_TYPE_MSIX
) {
4721 struct mxge_slice_state
*ss
= &sc
->ss
[0];
4723 if (ss
->intr_res
!= NULL
) {
4724 bus_release_resource(sc
->dev
, SYS_RES_IRQ
,
4725 ss
->intr_rid
, ss
->intr_res
);
4727 if (sc
->intr_type
== PCI_INTR_TYPE_MSI
)
4728 pci_release_msi(sc
->dev
);
4730 mxge_free_msix(sc
, TRUE
);