i386 removal, part 61/x: Adjust some further #ifdefs.
[dragonfly.git] / sys / dev / netif / mxge / if_mxge.c
blob419cdfdfb3cce33d3007923590c39c18c3bdcf8a
1 /******************************************************************************
3 Copyright (c) 2006-2013, Myricom Inc.
4 All rights reserved.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $
30 ***************************************************************************/
32 #include "opt_ifpoll.h"
33 #include "opt_inet.h"
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/linker.h>
38 #include <sys/firmware.h>
39 #include <sys/endian.h>
40 #include <sys/in_cksum.h>
41 #include <sys/sockio.h>
42 #include <sys/mbuf.h>
43 #include <sys/malloc.h>
44 #include <sys/kernel.h>
45 #include <sys/module.h>
46 #include <sys/serialize.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
50 #include <net/if.h>
51 #include <net/if_arp.h>
52 #include <net/ifq_var.h>
53 #include <net/ethernet.h>
54 #include <net/if_dl.h>
55 #include <net/if_media.h>
56 #include <net/if_poll.h>
58 #include <net/bpf.h>
60 #include <net/if_types.h>
61 #include <net/vlan/if_vlan_var.h>
62 #include <net/zlib.h>
63 #include <net/toeplitz.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/in.h>
67 #include <netinet/ip.h>
68 #include <netinet/tcp.h>
70 #include <sys/bus.h>
71 #include <sys/rman.h>
73 #include <bus/pci/pcireg.h>
74 #include <bus/pci/pcivar.h>
75 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
77 #include <vm/vm.h> /* for pmap_mapdev() */
78 #include <vm/pmap.h>
80 #if defined(__x86_64__)
81 #include <machine/specialreg.h>
82 #endif
84 #include <dev/netif/mxge/mxge_mcp.h>
85 #include <dev/netif/mxge/mcp_gen_header.h>
86 #include <dev/netif/mxge/if_mxge_var.h>
88 #define MXGE_IFM (IFM_ETHER | IFM_FDX | IFM_ETH_FORCEPAUSE)
90 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD)
91 #define MXGE_HWRSS_KEYLEN 16
93 /* Tunable params */
94 static int mxge_nvidia_ecrc_enable = 1;
95 static int mxge_force_firmware = 0;
96 static int mxge_intr_coal_delay = MXGE_INTR_COAL_DELAY;
97 static int mxge_deassert_wait = 1;
98 static int mxge_ticks;
99 static int mxge_num_slices = 0;
100 static int mxge_always_promisc = 0;
101 static int mxge_throttle = 0;
102 static int mxge_msi_enable = 1;
103 static int mxge_msix_enable = 1;
104 static int mxge_multi_tx = 1;
106 * Don't use RSS by default, its just too slow
108 static int mxge_use_rss = 0;
110 static char mxge_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FORCE_FULL;
112 static const char *mxge_fw_unaligned = "mxge_ethp_z8e";
113 static const char *mxge_fw_aligned = "mxge_eth_z8e";
114 static const char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
115 static const char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
117 TUNABLE_INT("hw.mxge.num_slices", &mxge_num_slices);
118 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay);
119 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable);
120 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware);
121 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait);
122 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks);
123 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc);
124 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle);
125 TUNABLE_INT("hw.mxge.multi_tx", &mxge_multi_tx);
126 TUNABLE_INT("hw.mxge.use_rss", &mxge_use_rss);
127 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable);
128 TUNABLE_INT("hw.mxge.msix.enable", &mxge_msix_enable);
129 TUNABLE_STR("hw.mxge.flow_ctrl", mxge_flowctrl, sizeof(mxge_flowctrl));
131 static int mxge_probe(device_t dev);
132 static int mxge_attach(device_t dev);
133 static int mxge_detach(device_t dev);
134 static int mxge_shutdown(device_t dev);
136 static int mxge_alloc_intr(struct mxge_softc *sc);
137 static void mxge_free_intr(struct mxge_softc *sc);
138 static int mxge_setup_intr(struct mxge_softc *sc);
139 static void mxge_teardown_intr(struct mxge_softc *sc, int cnt);
141 static device_method_t mxge_methods[] = {
142 /* Device interface */
143 DEVMETHOD(device_probe, mxge_probe),
144 DEVMETHOD(device_attach, mxge_attach),
145 DEVMETHOD(device_detach, mxge_detach),
146 DEVMETHOD(device_shutdown, mxge_shutdown),
147 DEVMETHOD_END
150 static driver_t mxge_driver = {
151 "mxge",
152 mxge_methods,
153 sizeof(mxge_softc_t),
156 static devclass_t mxge_devclass;
158 /* Declare ourselves to be a child of the PCI bus.*/
159 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL);
160 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
161 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
163 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
164 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
165 static void mxge_close(mxge_softc_t *sc, int down);
166 static int mxge_open(mxge_softc_t *sc);
167 static void mxge_tick(void *arg);
168 static void mxge_watchdog_reset(mxge_softc_t *sc);
169 static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice);
171 static int
172 mxge_probe(device_t dev)
174 if (pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM &&
175 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E ||
176 pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9)) {
177 int rev = pci_get_revid(dev);
179 switch (rev) {
180 case MXGE_PCI_REV_Z8E:
181 device_set_desc(dev, "Myri10G-PCIE-8A");
182 break;
183 case MXGE_PCI_REV_Z8ES:
184 device_set_desc(dev, "Myri10G-PCIE-8B");
185 break;
186 default:
187 device_set_desc(dev, "Myri10G-PCIE-8??");
188 device_printf(dev, "Unrecognized rev %d NIC\n", rev);
189 break;
191 return 0;
193 return ENXIO;
196 static void
197 mxge_enable_wc(mxge_softc_t *sc)
199 #if defined(__x86_64__)
200 vm_offset_t len;
202 sc->wc = 1;
203 len = rman_get_size(sc->mem_res);
204 pmap_change_attr((vm_offset_t) sc->sram, len / PAGE_SIZE,
205 PAT_WRITE_COMBINING);
206 #endif
209 static int
210 mxge_dma_alloc(mxge_softc_t *sc, bus_dmamem_t *dma, size_t bytes,
211 bus_size_t alignment)
213 bus_size_t boundary;
214 int err;
216 if (bytes > 4096 && alignment == 4096)
217 boundary = 0;
218 else
219 boundary = 4096;
221 err = bus_dmamem_coherent(sc->parent_dmat, alignment, boundary,
222 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, bytes,
223 BUS_DMA_WAITOK | BUS_DMA_ZERO, dma);
224 if (err != 0) {
225 device_printf(sc->dev, "bus_dmamem_coherent failed: %d\n", err);
226 return err;
228 return 0;
231 static void
232 mxge_dma_free(bus_dmamem_t *dma)
234 bus_dmamap_unload(dma->dmem_tag, dma->dmem_map);
235 bus_dmamem_free(dma->dmem_tag, dma->dmem_addr, dma->dmem_map);
236 bus_dma_tag_destroy(dma->dmem_tag);
240 * The eeprom strings on the lanaiX have the format
241 * SN=x\0
242 * MAC=x:x:x:x:x:x\0
243 * PC=text\0
245 static int
246 mxge_parse_strings(mxge_softc_t *sc)
248 const char *ptr;
249 int i, found_mac, found_sn2;
250 char *endptr;
252 ptr = sc->eeprom_strings;
253 found_mac = 0;
254 found_sn2 = 0;
255 while (*ptr != '\0') {
256 if (strncmp(ptr, "MAC=", 4) == 0) {
257 ptr += 4;
258 for (i = 0;;) {
259 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
260 if (endptr - ptr != 2)
261 goto abort;
262 ptr = endptr;
263 if (++i == 6)
264 break;
265 if (*ptr++ != ':')
266 goto abort;
268 found_mac = 1;
269 } else if (strncmp(ptr, "PC=", 3) == 0) {
270 ptr += 3;
271 strlcpy(sc->product_code_string, ptr,
272 sizeof(sc->product_code_string));
273 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
274 ptr += 3;
275 strlcpy(sc->serial_number_string, ptr,
276 sizeof(sc->serial_number_string));
277 } else if (strncmp(ptr, "SN2=", 4) == 0) {
278 /* SN2 takes precedence over SN */
279 ptr += 4;
280 found_sn2 = 1;
281 strlcpy(sc->serial_number_string, ptr,
282 sizeof(sc->serial_number_string));
284 while (*ptr++ != '\0') {}
287 if (found_mac)
288 return 0;
290 abort:
291 device_printf(sc->dev, "failed to parse eeprom_strings\n");
292 return ENXIO;
295 #if defined(__x86_64__)
297 static void
298 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
300 uint32_t val;
301 unsigned long base, off;
302 char *va, *cfgptr;
303 device_t pdev, mcp55;
304 uint16_t vendor_id, device_id, word;
305 uintptr_t bus, slot, func, ivend, idev;
306 uint32_t *ptr32;
308 if (!mxge_nvidia_ecrc_enable)
309 return;
311 pdev = device_get_parent(device_get_parent(sc->dev));
312 if (pdev == NULL) {
313 device_printf(sc->dev, "could not find parent?\n");
314 return;
316 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
317 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
319 if (vendor_id != 0x10de)
320 return;
322 base = 0;
324 if (device_id == 0x005d) {
325 /* ck804, base address is magic */
326 base = 0xe0000000UL;
327 } else if (device_id >= 0x0374 && device_id <= 0x378) {
328 /* mcp55, base address stored in chipset */
329 mcp55 = pci_find_bsf(0, 0, 0);
330 if (mcp55 &&
331 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
332 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
333 word = pci_read_config(mcp55, 0x90, 2);
334 base = ((unsigned long)word & 0x7ffeU) << 25;
337 if (!base)
338 return;
341 * XXXX
342 * Test below is commented because it is believed that doing
343 * config read/write beyond 0xff will access the config space
344 * for the next larger function. Uncomment this and remove
345 * the hacky pmap_mapdev() way of accessing config space when
346 * DragonFly grows support for extended pcie config space access.
348 #if 0
350 * See if we can, by some miracle, access the extended
351 * config space
353 val = pci_read_config(pdev, 0x178, 4);
354 if (val != 0xffffffff) {
355 val |= 0x40;
356 pci_write_config(pdev, 0x178, val, 4);
357 return;
359 #endif
361 * Rather than using normal pci config space writes, we must
362 * map the Nvidia config space ourselves. This is because on
363 * opteron/nvidia class machine the 0xe000000 mapping is
364 * handled by the nvidia chipset, that means the internal PCI
365 * device (the on-chip northbridge), or the amd-8131 bridge
366 * and things behind them are not visible by this method.
369 BUS_READ_IVAR(device_get_parent(pdev), pdev,
370 PCI_IVAR_BUS, &bus);
371 BUS_READ_IVAR(device_get_parent(pdev), pdev,
372 PCI_IVAR_SLOT, &slot);
373 BUS_READ_IVAR(device_get_parent(pdev), pdev,
374 PCI_IVAR_FUNCTION, &func);
375 BUS_READ_IVAR(device_get_parent(pdev), pdev,
376 PCI_IVAR_VENDOR, &ivend);
377 BUS_READ_IVAR(device_get_parent(pdev), pdev,
378 PCI_IVAR_DEVICE, &idev);
380 off = base + 0x00100000UL * (unsigned long)bus +
381 0x00001000UL * (unsigned long)(func + 8 * slot);
383 /* map it into the kernel */
384 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
385 if (va == NULL) {
386 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
387 return;
389 /* get a pointer to the config space mapped into the kernel */
390 cfgptr = va + (off & PAGE_MASK);
392 /* make sure that we can really access it */
393 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
394 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
395 if (!(vendor_id == ivend && device_id == idev)) {
396 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
397 vendor_id, device_id);
398 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
399 return;
402 ptr32 = (uint32_t*)(cfgptr + 0x178);
403 val = *ptr32;
405 if (val == 0xffffffff) {
406 device_printf(sc->dev, "extended mapping failed\n");
407 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
408 return;
410 *ptr32 = val | 0x40;
411 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
412 if (bootverbose) {
413 device_printf(sc->dev, "Enabled ECRC on upstream "
414 "Nvidia bridge at %d:%d:%d\n",
415 (int)bus, (int)slot, (int)func);
419 #else /* __x86_64__ */
421 static void
422 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
424 device_printf(sc->dev, "Nforce 4 chipset on non-x86/x86_64!?!?!\n");
427 #endif
429 static int
430 mxge_dma_test(mxge_softc_t *sc, int test_type)
432 mxge_cmd_t cmd;
433 bus_addr_t dmatest_bus = sc->dmabench_dma.dmem_busaddr;
434 int status;
435 uint32_t len;
436 const char *test = " ";
439 * Run a small DMA test.
440 * The magic multipliers to the length tell the firmware
441 * to do DMA read, write, or read+write tests. The
442 * results are returned in cmd.data0. The upper 16
443 * bits of the return is the number of transfers completed.
444 * The lower 16 bits is the time in 0.5us ticks that the
445 * transfers took to complete.
448 len = sc->tx_boundary;
450 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
451 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
452 cmd.data2 = len * 0x10000;
453 status = mxge_send_cmd(sc, test_type, &cmd);
454 if (status != 0) {
455 test = "read";
456 goto abort;
458 sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
460 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
461 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
462 cmd.data2 = len * 0x1;
463 status = mxge_send_cmd(sc, test_type, &cmd);
464 if (status != 0) {
465 test = "write";
466 goto abort;
468 sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
470 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
471 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
472 cmd.data2 = len * 0x10001;
473 status = mxge_send_cmd(sc, test_type, &cmd);
474 if (status != 0) {
475 test = "read/write";
476 goto abort;
478 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
479 (cmd.data0 & 0xffff);
481 abort:
482 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) {
483 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
484 test, status);
486 return status;
490 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
491 * when the PCI-E Completion packets are aligned on an 8-byte
492 * boundary. Some PCI-E chip sets always align Completion packets; on
493 * the ones that do not, the alignment can be enforced by enabling
494 * ECRC generation (if supported).
496 * When PCI-E Completion packets are not aligned, it is actually more
497 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
499 * If the driver can neither enable ECRC nor verify that it has
500 * already been enabled, then it must use a firmware image which works
501 * around unaligned completion packets (ethp_z8e.dat), and it should
502 * also ensure that it never gives the device a Read-DMA which is
503 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
504 * enabled, then the driver should use the aligned (eth_z8e.dat)
505 * firmware image, and set tx_boundary to 4KB.
507 static int
508 mxge_firmware_probe(mxge_softc_t *sc)
510 device_t dev = sc->dev;
511 int reg, status;
512 uint16_t pectl;
514 sc->tx_boundary = 4096;
517 * Verify the max read request size was set to 4KB
518 * before trying the test with 4KB.
520 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
521 pectl = pci_read_config(dev, reg + 0x8, 2);
522 if ((pectl & (5 << 12)) != (5 << 12)) {
523 device_printf(dev, "Max Read Req. size != 4k (0x%x)\n",
524 pectl);
525 sc->tx_boundary = 2048;
530 * Load the optimized firmware (which assumes aligned PCIe
531 * completions) in order to see if it works on this host.
533 sc->fw_name = mxge_fw_aligned;
534 status = mxge_load_firmware(sc, 1);
535 if (status != 0)
536 return status;
539 * Enable ECRC if possible
541 mxge_enable_nvidia_ecrc(sc);
544 * Run a DMA test which watches for unaligned completions and
545 * aborts on the first one seen. Not required on Z8ES or newer.
547 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
548 return 0;
550 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
551 if (status == 0)
552 return 0; /* keep the aligned firmware */
554 if (status != E2BIG)
555 device_printf(dev, "DMA test failed: %d\n", status);
556 if (status == ENOSYS) {
557 device_printf(dev, "Falling back to ethp! "
558 "Please install up to date fw\n");
560 return status;
563 static int
564 mxge_select_firmware(mxge_softc_t *sc)
566 int aligned = 0;
567 int force_firmware = mxge_force_firmware;
569 if (sc->throttle)
570 force_firmware = sc->throttle;
572 if (force_firmware != 0) {
573 if (force_firmware == 1)
574 aligned = 1;
575 else
576 aligned = 0;
577 if (bootverbose) {
578 device_printf(sc->dev,
579 "Assuming %s completions (forced)\n",
580 aligned ? "aligned" : "unaligned");
582 goto abort;
586 * If the PCIe link width is 4 or less, we can use the aligned
587 * firmware and skip any checks
589 if (sc->link_width != 0 && sc->link_width <= 4) {
590 device_printf(sc->dev, "PCIe x%d Link, "
591 "expect reduced performance\n", sc->link_width);
592 aligned = 1;
593 goto abort;
596 if (mxge_firmware_probe(sc) == 0)
597 return 0;
599 abort:
600 if (aligned) {
601 sc->fw_name = mxge_fw_aligned;
602 sc->tx_boundary = 4096;
603 } else {
604 sc->fw_name = mxge_fw_unaligned;
605 sc->tx_boundary = 2048;
607 return mxge_load_firmware(sc, 0);
610 static int
611 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
613 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
614 if_printf(sc->ifp, "Bad firmware type: 0x%x\n",
615 be32toh(hdr->mcp_type));
616 return EIO;
619 /* Save firmware version for sysctl */
620 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
621 if (bootverbose)
622 if_printf(sc->ifp, "firmware id: %s\n", hdr->version);
624 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
625 &sc->fw_ver_minor, &sc->fw_ver_tiny);
627 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR &&
628 sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
629 if_printf(sc->ifp, "Found firmware version %s\n",
630 sc->fw_version);
631 if_printf(sc->ifp, "Driver needs %d.%d\n",
632 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
633 return EINVAL;
635 return 0;
638 static void *
639 z_alloc(void *nil, u_int items, u_int size)
641 return kmalloc(items * size, M_TEMP, M_WAITOK);
644 static void
645 z_free(void *nil, void *ptr)
647 kfree(ptr, M_TEMP);
650 static int
651 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
653 z_stream zs;
654 char *inflate_buffer;
655 const struct firmware *fw;
656 const mcp_gen_header_t *hdr;
657 unsigned hdr_offset;
658 int status;
659 unsigned int i;
660 char dummy;
661 size_t fw_len;
663 fw = firmware_get(sc->fw_name);
664 if (fw == NULL) {
665 if_printf(sc->ifp, "Could not find firmware image %s\n",
666 sc->fw_name);
667 return ENOENT;
670 /* Setup zlib and decompress f/w */
671 bzero(&zs, sizeof(zs));
672 zs.zalloc = z_alloc;
673 zs.zfree = z_free;
674 status = inflateInit(&zs);
675 if (status != Z_OK) {
676 status = EIO;
677 goto abort_with_fw;
681 * The uncompressed size is stored as the firmware version,
682 * which would otherwise go unused
684 fw_len = (size_t)fw->version;
685 inflate_buffer = kmalloc(fw_len, M_TEMP, M_WAITOK);
686 zs.avail_in = fw->datasize;
687 zs.next_in = __DECONST(char *, fw->data);
688 zs.avail_out = fw_len;
689 zs.next_out = inflate_buffer;
690 status = inflate(&zs, Z_FINISH);
691 if (status != Z_STREAM_END) {
692 if_printf(sc->ifp, "zlib %d\n", status);
693 status = EIO;
694 goto abort_with_buffer;
697 /* Check id */
698 hdr_offset =
699 htobe32(*(const uint32_t *)(inflate_buffer + MCP_HEADER_PTR_OFFSET));
700 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
701 if_printf(sc->ifp, "Bad firmware file");
702 status = EIO;
703 goto abort_with_buffer;
705 hdr = (const void*)(inflate_buffer + hdr_offset);
707 status = mxge_validate_firmware(sc, hdr);
708 if (status != 0)
709 goto abort_with_buffer;
711 /* Copy the inflated firmware to NIC SRAM. */
712 for (i = 0; i < fw_len; i += 256) {
713 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i,
714 min(256U, (unsigned)(fw_len - i)));
715 wmb();
716 dummy = *sc->sram;
717 wmb();
720 *limit = fw_len;
721 status = 0;
722 abort_with_buffer:
723 kfree(inflate_buffer, M_TEMP);
724 inflateEnd(&zs);
725 abort_with_fw:
726 firmware_put(fw, FIRMWARE_UNLOAD);
727 return status;
731 * Enable or disable periodic RDMAs from the host to make certain
732 * chipsets resend dropped PCIe messages
734 static void
735 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
737 char buf_bytes[72];
738 volatile uint32_t *confirm;
739 volatile char *submit;
740 uint32_t *buf, dma_low, dma_high;
741 int i;
743 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
745 /* Clear confirmation addr */
746 confirm = (volatile uint32_t *)sc->cmd;
747 *confirm = 0;
748 wmb();
751 * Send an rdma command to the PCIe engine, and wait for the
752 * response in the confirmation address. The firmware should
753 * write a -1 there to indicate it is alive and well
755 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
756 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
757 buf[0] = htobe32(dma_high); /* confirm addr MSW */
758 buf[1] = htobe32(dma_low); /* confirm addr LSW */
759 buf[2] = htobe32(0xffffffff); /* confirm data */
760 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.dmem_busaddr);
761 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.dmem_busaddr);
762 buf[3] = htobe32(dma_high); /* dummy addr MSW */
763 buf[4] = htobe32(dma_low); /* dummy addr LSW */
764 buf[5] = htobe32(enable); /* enable? */
766 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
768 mxge_pio_copy(submit, buf, 64);
769 wmb();
770 DELAY(1000);
771 wmb();
772 i = 0;
773 while (*confirm != 0xffffffff && i < 20) {
774 DELAY(1000);
775 i++;
777 if (*confirm != 0xffffffff) {
778 if_printf(sc->ifp, "dummy rdma %s failed (%p = 0x%x)",
779 (enable ? "enable" : "disable"), confirm, *confirm);
783 static int
784 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
786 mcp_cmd_t *buf;
787 char buf_bytes[sizeof(*buf) + 8];
788 volatile mcp_cmd_response_t *response = sc->cmd;
789 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
790 uint32_t dma_low, dma_high;
791 int err, sleep_total = 0;
793 /* Ensure buf is aligned to 8 bytes */
794 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
796 buf->data0 = htobe32(data->data0);
797 buf->data1 = htobe32(data->data1);
798 buf->data2 = htobe32(data->data2);
799 buf->cmd = htobe32(cmd);
800 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
801 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
803 buf->response_addr.low = htobe32(dma_low);
804 buf->response_addr.high = htobe32(dma_high);
806 response->result = 0xffffffff;
807 wmb();
808 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
811 * Wait up to 20ms
813 err = EAGAIN;
814 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
815 wmb();
816 switch (be32toh(response->result)) {
817 case 0:
818 data->data0 = be32toh(response->data);
819 err = 0;
820 break;
821 case 0xffffffff:
822 DELAY(1000);
823 break;
824 case MXGEFW_CMD_UNKNOWN:
825 err = ENOSYS;
826 break;
827 case MXGEFW_CMD_ERROR_UNALIGNED:
828 err = E2BIG;
829 break;
830 case MXGEFW_CMD_ERROR_BUSY:
831 err = EBUSY;
832 break;
833 case MXGEFW_CMD_ERROR_I2C_ABSENT:
834 err = ENXIO;
835 break;
836 default:
837 if_printf(sc->ifp, "command %d failed, result = %d\n",
838 cmd, be32toh(response->result));
839 err = ENXIO;
840 break;
842 if (err != EAGAIN)
843 break;
845 if (err == EAGAIN) {
846 if_printf(sc->ifp, "command %d timed out result = %d\n",
847 cmd, be32toh(response->result));
849 return err;
852 static int
853 mxge_adopt_running_firmware(mxge_softc_t *sc)
855 struct mcp_gen_header *hdr;
856 const size_t bytes = sizeof(struct mcp_gen_header);
857 size_t hdr_offset;
858 int status;
861 * Find running firmware header
863 hdr_offset =
864 htobe32(*(volatile uint32_t *)(sc->sram + MCP_HEADER_PTR_OFFSET));
866 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
867 if_printf(sc->ifp, "Running firmware has bad header offset "
868 "(%zu)\n", hdr_offset);
869 return EIO;
873 * Copy header of running firmware from SRAM to host memory to
874 * validate firmware
876 hdr = kmalloc(bytes, M_DEVBUF, M_WAITOK);
877 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
878 rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes);
879 status = mxge_validate_firmware(sc, hdr);
880 kfree(hdr, M_DEVBUF);
883 * Check to see if adopted firmware has bug where adopting
884 * it will cause broadcasts to be filtered unless the NIC
885 * is kept in ALLMULTI mode
887 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
888 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
889 sc->adopted_rx_filter_bug = 1;
890 if_printf(sc->ifp, "Adopting fw %d.%d.%d: "
891 "working around rx filter bug\n",
892 sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny);
895 return status;
898 static int
899 mxge_load_firmware(mxge_softc_t *sc, int adopt)
901 volatile uint32_t *confirm;
902 volatile char *submit;
903 char buf_bytes[72];
904 uint32_t *buf, size, dma_low, dma_high;
905 int status, i;
907 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
909 size = sc->sram_size;
910 status = mxge_load_firmware_helper(sc, &size);
911 if (status) {
912 if (!adopt)
913 return status;
916 * Try to use the currently running firmware, if
917 * it is new enough
919 status = mxge_adopt_running_firmware(sc);
920 if (status) {
921 if_printf(sc->ifp,
922 "failed to adopt running firmware\n");
923 return status;
925 if_printf(sc->ifp, "Successfully adopted running firmware\n");
927 if (sc->tx_boundary == 4096) {
928 if_printf(sc->ifp,
929 "Using firmware currently running on NIC. "
930 "For optimal\n");
931 if_printf(sc->ifp, "performance consider loading "
932 "optimized firmware\n");
934 sc->fw_name = mxge_fw_unaligned;
935 sc->tx_boundary = 2048;
936 return 0;
939 /* Clear confirmation addr */
940 confirm = (volatile uint32_t *)sc->cmd;
941 *confirm = 0;
942 wmb();
945 * Send a reload command to the bootstrap MCP, and wait for the
946 * response in the confirmation address. The firmware should
947 * write a -1 there to indicate it is alive and well
950 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
951 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
953 buf[0] = htobe32(dma_high); /* confirm addr MSW */
954 buf[1] = htobe32(dma_low); /* confirm addr LSW */
955 buf[2] = htobe32(0xffffffff); /* confirm data */
958 * FIX: All newest firmware should un-protect the bottom of
959 * the sram before handoff. However, the very first interfaces
960 * do not. Therefore the handoff copy must skip the first 8 bytes
962 /* where the code starts*/
963 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
964 buf[4] = htobe32(size - 8); /* length of code */
965 buf[5] = htobe32(8); /* where to copy to */
966 buf[6] = htobe32(0); /* where to jump to */
968 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
969 mxge_pio_copy(submit, buf, 64);
970 wmb();
971 DELAY(1000);
972 wmb();
973 i = 0;
974 while (*confirm != 0xffffffff && i < 20) {
975 DELAY(1000*10);
976 i++;
978 if (*confirm != 0xffffffff) {
979 if_printf(sc->ifp,"handoff failed (%p = 0x%x)",
980 confirm, *confirm);
981 return ENXIO;
983 return 0;
986 static int
987 mxge_update_mac_address(mxge_softc_t *sc)
989 mxge_cmd_t cmd;
990 uint8_t *addr = sc->mac_addr;
992 cmd.data0 = (addr[0] << 24) | (addr[1] << 16) |
993 (addr[2] << 8) | addr[3];
994 cmd.data1 = (addr[4] << 8) | (addr[5]);
995 return mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
998 static int
999 mxge_change_pause(mxge_softc_t *sc, int pause)
1001 mxge_cmd_t cmd;
1002 int status;
1004 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */
1005 if (pause)
1006 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd);
1007 else
1008 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd);
1009 if (status) {
1010 if_printf(sc->ifp, "Failed to set flow control mode\n");
1011 return ENXIO;
1013 sc->pause = pause;
1014 return 0;
1017 static void
1018 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1020 mxge_cmd_t cmd;
1021 int status;
1023 bzero(&cmd, sizeof(cmd)); /* avoid gcc warning */
1024 if (mxge_always_promisc)
1025 promisc = 1;
1027 if (promisc)
1028 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd);
1029 else
1030 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd);
1031 if (status)
1032 if_printf(sc->ifp, "Failed to set promisc mode\n");
1035 static void
1036 mxge_set_multicast_list(mxge_softc_t *sc)
1038 mxge_cmd_t cmd;
1039 struct ifmultiaddr *ifma;
1040 struct ifnet *ifp = sc->ifp;
1041 int err;
1043 /* This firmware is known to not support multicast */
1044 if (!sc->fw_multicast_support)
1045 return;
1047 /* Disable multicast filtering while we play with the lists*/
1048 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */
1049 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1050 if (err != 0) {
1051 if_printf(ifp, "Failed MXGEFW_ENABLE_ALLMULTI, "
1052 "error status: %d\n", err);
1053 return;
1056 if (sc->adopted_rx_filter_bug)
1057 return;
1059 if (ifp->if_flags & IFF_ALLMULTI) {
1060 /* Request to disable multicast filtering, so quit here */
1061 return;
1064 /* Flush all the filters */
1065 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1066 if (err != 0) {
1067 if_printf(ifp, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, "
1068 "error status: %d\n", err);
1069 return;
1073 * Walk the multicast list, and add each address
1075 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1076 if (ifma->ifma_addr->sa_family != AF_LINK)
1077 continue;
1079 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1080 &cmd.data0, 4);
1081 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1082 &cmd.data1, 2);
1083 cmd.data0 = htonl(cmd.data0);
1084 cmd.data1 = htonl(cmd.data1);
1085 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1086 if (err != 0) {
1087 if_printf(ifp, "Failed MXGEFW_JOIN_MULTICAST_GROUP, "
1088 "error status: %d\n", err);
1089 /* Abort, leaving multicast filtering off */
1090 return;
1094 /* Enable multicast filtering */
1095 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1096 if (err != 0) {
1097 if_printf(ifp, "Failed MXGEFW_DISABLE_ALLMULTI, "
1098 "error status: %d\n", err);
1102 #if 0
1103 static int
1104 mxge_max_mtu(mxge_softc_t *sc)
1106 mxge_cmd_t cmd;
1107 int status;
1109 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1110 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1112 /* try to set nbufs to see if it we can
1113 use virtually contiguous jumbos */
1114 cmd.data0 = 0;
1115 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1116 &cmd);
1117 if (status == 0)
1118 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1120 /* otherwise, we're limited to MJUMPAGESIZE */
1121 return MJUMPAGESIZE - MXGEFW_PAD;
1123 #endif
1125 static int
1126 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1128 struct mxge_slice_state *ss;
1129 mxge_rx_done_t *rx_done;
1130 volatile uint32_t *irq_claim;
1131 mxge_cmd_t cmd;
1132 int slice, status, rx_intr_size;
1135 * Try to send a reset command to the card to see if it
1136 * is alive
1138 memset(&cmd, 0, sizeof (cmd));
1139 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1140 if (status != 0) {
1141 if_printf(sc->ifp, "failed reset\n");
1142 return ENXIO;
1145 mxge_dummy_rdma(sc, 1);
1148 * Set the intrq size
1149 * XXX assume 4byte mcp_slot
1151 rx_intr_size = sc->rx_intr_slots * sizeof(mcp_slot_t);
1152 cmd.data0 = rx_intr_size;
1153 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1156 * Even though we already know how many slices are supported
1157 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1158 * has magic side effects, and must be called after a reset.
1159 * It must be called prior to calling any RSS related cmds,
1160 * including assigning an interrupt queue for anything but
1161 * slice 0. It must also be called *after*
1162 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1163 * the firmware to compute offsets.
1165 if (sc->num_slices > 1) {
1166 /* Ask the maximum number of slices it supports */
1167 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
1168 if (status != 0) {
1169 if_printf(sc->ifp, "failed to get number of slices\n");
1170 return status;
1174 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1175 * to setting up the interrupt queue DMA
1177 cmd.data0 = sc->num_slices;
1178 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1179 if (sc->num_tx_rings > 1)
1180 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1181 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd);
1182 if (status != 0) {
1183 if_printf(sc->ifp, "failed to set number of slices\n");
1184 return status;
1188 if (interrupts_setup) {
1189 /* Now exchange information about interrupts */
1190 for (slice = 0; slice < sc->num_slices; slice++) {
1191 ss = &sc->ss[slice];
1193 rx_done = &ss->rx_data.rx_done;
1194 memset(rx_done->entry, 0, rx_intr_size);
1196 cmd.data0 =
1197 MXGE_LOWPART_TO_U32(ss->rx_done_dma.dmem_busaddr);
1198 cmd.data1 =
1199 MXGE_HIGHPART_TO_U32(ss->rx_done_dma.dmem_busaddr);
1200 cmd.data2 = slice;
1201 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA,
1202 &cmd);
1206 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET,
1207 &cmd);
1208 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1210 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1211 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1213 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd);
1214 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1216 if (status != 0) {
1217 if_printf(sc->ifp, "failed set interrupt parameters\n");
1218 return status;
1221 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1223 /* Run a DMA benchmark */
1224 mxge_dma_test(sc, MXGEFW_DMA_TEST);
1226 for (slice = 0; slice < sc->num_slices; slice++) {
1227 ss = &sc->ss[slice];
1229 ss->irq_claim = irq_claim + (2 * slice);
1231 /* Reset mcp/driver shared state back to 0 */
1232 ss->rx_data.rx_done.idx = 0;
1233 ss->tx.req = 0;
1234 ss->tx.done = 0;
1235 ss->tx.pkt_done = 0;
1236 ss->tx.queue_active = 0;
1237 ss->tx.activate = 0;
1238 ss->tx.deactivate = 0;
1239 ss->rx_data.rx_big.cnt = 0;
1240 ss->rx_data.rx_small.cnt = 0;
1241 if (ss->fw_stats != NULL)
1242 bzero(ss->fw_stats, sizeof(*ss->fw_stats));
1244 sc->rdma_tags_available = 15;
1246 status = mxge_update_mac_address(sc);
1247 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1248 mxge_change_pause(sc, sc->pause);
1249 mxge_set_multicast_list(sc);
1251 if (sc->throttle) {
1252 cmd.data0 = sc->throttle;
1253 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd))
1254 if_printf(sc->ifp, "can't enable throttle\n");
1256 return status;
1259 static int
1260 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1262 mxge_cmd_t cmd;
1263 mxge_softc_t *sc;
1264 int err;
1265 unsigned int throttle;
1267 sc = arg1;
1268 throttle = sc->throttle;
1269 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1270 if (err != 0)
1271 return err;
1273 if (throttle == sc->throttle)
1274 return 0;
1276 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1277 return EINVAL;
1279 ifnet_serialize_all(sc->ifp);
1281 cmd.data0 = throttle;
1282 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1283 if (err == 0)
1284 sc->throttle = throttle;
1286 ifnet_deserialize_all(sc->ifp);
1287 return err;
1290 static int
1291 mxge_change_use_rss(SYSCTL_HANDLER_ARGS)
1293 mxge_softc_t *sc;
1294 int err, use_rss;
1296 sc = arg1;
1297 use_rss = sc->use_rss;
1298 err = sysctl_handle_int(oidp, &use_rss, arg2, req);
1299 if (err != 0)
1300 return err;
1302 if (use_rss == sc->use_rss)
1303 return 0;
1305 ifnet_serialize_all(sc->ifp);
1307 sc->use_rss = use_rss;
1308 if (sc->ifp->if_flags & IFF_RUNNING) {
1309 mxge_close(sc, 0);
1310 mxge_open(sc);
1313 ifnet_deserialize_all(sc->ifp);
1314 return err;
1317 static int
1318 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1320 mxge_softc_t *sc;
1321 unsigned int intr_coal_delay;
1322 int err;
1324 sc = arg1;
1325 intr_coal_delay = sc->intr_coal_delay;
1326 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1327 if (err != 0)
1328 return err;
1330 if (intr_coal_delay == sc->intr_coal_delay)
1331 return 0;
1333 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1334 return EINVAL;
1336 ifnet_serialize_all(sc->ifp);
1338 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1339 sc->intr_coal_delay = intr_coal_delay;
1341 ifnet_deserialize_all(sc->ifp);
1342 return err;
1345 static int
1346 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1348 int err;
1350 if (arg1 == NULL)
1351 return EFAULT;
1352 arg2 = be32toh(*(int *)arg1);
1353 arg1 = NULL;
1354 err = sysctl_handle_int(oidp, arg1, arg2, req);
1356 return err;
1359 static void
1360 mxge_rem_sysctls(mxge_softc_t *sc)
1362 if (sc->ss != NULL) {
1363 struct mxge_slice_state *ss;
1364 int slice;
1366 for (slice = 0; slice < sc->num_slices; slice++) {
1367 ss = &sc->ss[slice];
1368 if (ss->sysctl_tree != NULL) {
1369 sysctl_ctx_free(&ss->sysctl_ctx);
1370 ss->sysctl_tree = NULL;
1375 if (sc->slice_sysctl_tree != NULL) {
1376 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1377 sc->slice_sysctl_tree = NULL;
1381 static void
1382 mxge_add_sysctls(mxge_softc_t *sc)
1384 struct sysctl_ctx_list *ctx;
1385 struct sysctl_oid_list *children;
1386 mcp_irq_data_t *fw;
1387 struct mxge_slice_state *ss;
1388 int slice;
1389 char slice_num[8];
1391 ctx = device_get_sysctl_ctx(sc->dev);
1392 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1393 fw = sc->ss[0].fw_stats;
1396 * Random information
1398 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
1399 CTLFLAG_RD, &sc->fw_version, 0, "firmware version");
1401 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number",
1402 CTLFLAG_RD, &sc->serial_number_string, 0, "serial number");
1404 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code",
1405 CTLFLAG_RD, &sc->product_code_string, 0, "product code");
1407 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width",
1408 CTLFLAG_RD, &sc->link_width, 0, "link width");
1410 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary",
1411 CTLFLAG_RD, &sc->tx_boundary, 0, "tx boundary");
1413 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine",
1414 CTLFLAG_RD, &sc->wc, 0, "write combining PIO");
1416 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs",
1417 CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s");
1419 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs",
1420 CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s");
1422 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs",
1423 CTLFLAG_RD, &sc->read_write_dma, 0,
1424 "DMA concurrent Read/Write speed in MB/s");
1426 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets",
1427 CTLFLAG_RD, &sc->watchdog_resets, 0,
1428 "Number of times NIC was reset");
1431 * Performance related tunables
1433 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay",
1434 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I",
1435 "Interrupt coalescing delay in usecs");
1437 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle",
1438 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I",
1439 "Transmit throttling");
1441 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "use_rss",
1442 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_use_rss, "I",
1443 "Use RSS");
1445 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait",
1446 CTLFLAG_RW, &mxge_deassert_wait, 0,
1447 "Wait for IRQ line to go low in ihandler");
1450 * Stats block from firmware is in network byte order.
1451 * Need to swap it
1453 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up",
1454 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0,
1455 mxge_handle_be32, "I", "link up");
1457 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available",
1458 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0,
1459 mxge_handle_be32, "I", "rdma_tags_available");
1461 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32",
1462 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0,
1463 mxge_handle_be32, "I", "dropped_bad_crc32");
1465 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy",
1466 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0,
1467 mxge_handle_be32, "I", "dropped_bad_phy");
1469 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered",
1470 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0,
1471 mxge_handle_be32, "I", "dropped_link_error_or_filtered");
1473 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow",
1474 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0,
1475 mxge_handle_be32, "I", "dropped_link_overflow");
1477 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered",
1478 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0,
1479 mxge_handle_be32, "I", "dropped_multicast_filtered");
1481 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer",
1482 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0,
1483 mxge_handle_be32, "I", "dropped_no_big_buffer");
1485 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer",
1486 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0,
1487 mxge_handle_be32, "I", "dropped_no_small_buffer");
1489 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun",
1490 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0,
1491 mxge_handle_be32, "I", "dropped_overrun");
1493 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause",
1494 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0,
1495 mxge_handle_be32, "I", "dropped_pause");
1497 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt",
1498 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0,
1499 mxge_handle_be32, "I", "dropped_runt");
1501 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered",
1502 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0,
1503 mxge_handle_be32, "I", "dropped_unicast_filtered");
1505 /* add counters exported for debugging from all slices */
1506 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1507 sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx,
1508 children, OID_AUTO, "slice", CTLFLAG_RD, 0, "");
1509 if (sc->slice_sysctl_tree == NULL) {
1510 device_printf(sc->dev, "can't add slice sysctl node\n");
1511 return;
1514 for (slice = 0; slice < sc->num_slices; slice++) {
1515 ss = &sc->ss[slice];
1516 sysctl_ctx_init(&ss->sysctl_ctx);
1517 ctx = &ss->sysctl_ctx;
1518 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1519 ksprintf(slice_num, "%d", slice);
1520 ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
1521 slice_num, CTLFLAG_RD, 0, "");
1522 if (ss->sysctl_tree == NULL) {
1523 device_printf(sc->dev,
1524 "can't add %d slice sysctl node\n", slice);
1525 return; /* XXX continue? */
1527 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1530 * XXX change to ULONG
1533 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt",
1534 CTLFLAG_RD, &ss->rx_data.rx_small.cnt, 0, "rx_small_cnt");
1536 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt",
1537 CTLFLAG_RD, &ss->rx_data.rx_big.cnt, 0, "rx_small_cnt");
1539 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req",
1540 CTLFLAG_RD, &ss->tx.req, 0, "tx_req");
1542 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done",
1543 CTLFLAG_RD, &ss->tx.done, 0, "tx_done");
1545 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done",
1546 CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done");
1548 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active",
1549 CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active");
1551 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate",
1552 CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate");
1554 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate",
1555 CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate");
1560 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1561 * backwards one at a time and handle ring wraps
1563 static __inline void
1564 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1565 mcp_kreq_ether_send_t *src, int cnt)
1567 int idx, starting_slot;
1569 starting_slot = tx->req;
1570 while (cnt > 1) {
1571 cnt--;
1572 idx = (starting_slot + cnt) & tx->mask;
1573 mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src));
1574 wmb();
1579 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1580 * at most 32 bytes at a time, so as to avoid involving the software
1581 * pio handler in the nic. We re-write the first segment's flags
1582 * to mark them valid only after writing the entire chain
1584 static __inline void
1585 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt)
1587 int idx, i;
1588 uint32_t *src_ints;
1589 volatile uint32_t *dst_ints;
1590 mcp_kreq_ether_send_t *srcp;
1591 volatile mcp_kreq_ether_send_t *dstp, *dst;
1592 uint8_t last_flags;
1594 idx = tx->req & tx->mask;
1596 last_flags = src->flags;
1597 src->flags = 0;
1598 wmb();
1599 dst = dstp = &tx->lanai[idx];
1600 srcp = src;
1602 if ((idx + cnt) < tx->mask) {
1603 for (i = 0; i < cnt - 1; i += 2) {
1604 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1605 wmb(); /* force write every 32 bytes */
1606 srcp += 2;
1607 dstp += 2;
1609 } else {
1611 * Submit all but the first request, and ensure
1612 * that it is submitted below
1614 mxge_submit_req_backwards(tx, src, cnt);
1615 i = 0;
1617 if (i < cnt) {
1618 /* Submit the first request */
1619 mxge_pio_copy(dstp, srcp, sizeof(*src));
1620 wmb(); /* barrier before setting valid flag */
1623 /* Re-write the last 32-bits with the valid flags */
1624 src->flags = last_flags;
1625 src_ints = (uint32_t *)src;
1626 src_ints+=3;
1627 dst_ints = (volatile uint32_t *)dst;
1628 dst_ints+=3;
1629 *dst_ints = *src_ints;
1630 tx->req += cnt;
1631 wmb();
1634 static int
1635 mxge_pullup_tso(struct mbuf **mp)
1637 int hoff, iphlen, thoff;
1638 struct mbuf *m;
1640 m = *mp;
1641 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1643 iphlen = m->m_pkthdr.csum_iphlen;
1644 thoff = m->m_pkthdr.csum_thlen;
1645 hoff = m->m_pkthdr.csum_lhlen;
1647 KASSERT(iphlen > 0, ("invalid ip hlen"));
1648 KASSERT(thoff > 0, ("invalid tcp hlen"));
1649 KASSERT(hoff > 0, ("invalid ether hlen"));
1651 if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1652 m = m_pullup(m, hoff + iphlen + thoff);
1653 if (m == NULL) {
1654 *mp = NULL;
1655 return ENOBUFS;
1657 *mp = m;
1659 return 0;
1662 static int
1663 mxge_encap_tso(mxge_tx_ring_t *tx, struct mxge_buffer_state *info_map,
1664 struct mbuf *m, int busdma_seg_cnt)
1666 mcp_kreq_ether_send_t *req;
1667 bus_dma_segment_t *seg;
1668 uint32_t low, high_swapped;
1669 int len, seglen, cum_len, cum_len_next;
1670 int next_is_first, chop, cnt, rdma_count, small;
1671 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1672 uint8_t flags, flags_next;
1673 struct mxge_buffer_state *info_last;
1674 bus_dmamap_t map = info_map->map;
1676 mss = m->m_pkthdr.tso_segsz;
1679 * Negative cum_len signifies to the send loop that we are
1680 * still in the header portion of the TSO packet.
1682 cum_len = -(m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen +
1683 m->m_pkthdr.csum_thlen);
1686 * TSO implies checksum offload on this hardware
1688 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen;
1689 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1692 * For TSO, pseudo_hdr_offset holds mss. The firmware figures
1693 * out where to put the checksum by parsing the header.
1695 pseudo_hdr_offset = htobe16(mss);
1697 req = tx->req_list;
1698 seg = tx->seg_list;
1699 cnt = 0;
1700 rdma_count = 0;
1703 * "rdma_count" is the number of RDMAs belonging to the current
1704 * packet BEFORE the current send request. For non-TSO packets,
1705 * this is equal to "count".
1707 * For TSO packets, rdma_count needs to be reset to 0 after a
1708 * segment cut.
1710 * The rdma_count field of the send request is the number of
1711 * RDMAs of the packet starting at that request. For TSO send
1712 * requests with one ore more cuts in the middle, this is the
1713 * number of RDMAs starting after the last cut in the request.
1714 * All previous segments before the last cut implicitly have 1
1715 * RDMA.
1717 * Since the number of RDMAs is not known beforehand, it must be
1718 * filled-in retroactively - after each segmentation cut or at
1719 * the end of the entire packet.
1722 while (busdma_seg_cnt) {
1724 * Break the busdma segment up into pieces
1726 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1727 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1728 len = seg->ds_len;
1730 while (len) {
1731 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1732 seglen = len;
1733 cum_len_next = cum_len + seglen;
1734 (req - rdma_count)->rdma_count = rdma_count + 1;
1735 if (__predict_true(cum_len >= 0)) {
1736 /* Payload */
1737 chop = (cum_len_next > mss);
1738 cum_len_next = cum_len_next % mss;
1739 next_is_first = (cum_len_next == 0);
1740 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1741 flags_next |=
1742 next_is_first * MXGEFW_FLAGS_FIRST;
1743 rdma_count |= -(chop | next_is_first);
1744 rdma_count += chop & !next_is_first;
1745 } else if (cum_len_next >= 0) {
1746 /* Header ends */
1747 rdma_count = -1;
1748 cum_len_next = 0;
1749 seglen = -cum_len;
1750 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1751 flags_next = MXGEFW_FLAGS_TSO_PLD |
1752 MXGEFW_FLAGS_FIRST |
1753 (small * MXGEFW_FLAGS_SMALL);
1756 req->addr_high = high_swapped;
1757 req->addr_low = htobe32(low);
1758 req->pseudo_hdr_offset = pseudo_hdr_offset;
1759 req->pad = 0;
1760 req->rdma_count = 1;
1761 req->length = htobe16(seglen);
1762 req->cksum_offset = cksum_offset;
1763 req->flags =
1764 flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD);
1765 low += seglen;
1766 len -= seglen;
1767 cum_len = cum_len_next;
1768 flags = flags_next;
1769 req++;
1770 cnt++;
1771 rdma_count++;
1772 if (__predict_false(cksum_offset > seglen))
1773 cksum_offset -= seglen;
1774 else
1775 cksum_offset = 0;
1776 if (__predict_false(cnt > tx->max_desc))
1777 goto drop;
1779 busdma_seg_cnt--;
1780 seg++;
1782 (req - rdma_count)->rdma_count = rdma_count;
1784 do {
1785 req--;
1786 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1787 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1789 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask];
1791 info_map->map = info_last->map;
1792 info_last->map = map;
1793 info_last->m = m;
1795 mxge_submit_req(tx, tx->req_list, cnt);
1797 if (tx->send_go != NULL && tx->queue_active == 0) {
1798 /* Tell the NIC to start polling this slice */
1799 *tx->send_go = 1;
1800 tx->queue_active = 1;
1801 tx->activate++;
1802 wmb();
1804 return 0;
1806 drop:
1807 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1808 m_freem(m);
1809 return ENOBUFS;
1812 static int
1813 mxge_encap(mxge_tx_ring_t *tx, struct mbuf *m, bus_addr_t zeropad)
1815 mcp_kreq_ether_send_t *req;
1816 bus_dma_segment_t *seg;
1817 bus_dmamap_t map;
1818 int cnt, cum_len, err, i, idx, odd_flag;
1819 uint16_t pseudo_hdr_offset;
1820 uint8_t flags, cksum_offset;
1821 struct mxge_buffer_state *info_map, *info_last;
1823 if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1824 err = mxge_pullup_tso(&m);
1825 if (__predict_false(err))
1826 return err;
1830 * Map the frame for DMA
1832 idx = tx->req & tx->mask;
1833 info_map = &tx->info[idx];
1834 map = info_map->map;
1836 err = bus_dmamap_load_mbuf_defrag(tx->dmat, map, &m,
1837 tx->seg_list, tx->max_desc - 2, &cnt, BUS_DMA_NOWAIT);
1838 if (__predict_false(err != 0))
1839 goto drop;
1840 bus_dmamap_sync(tx->dmat, map, BUS_DMASYNC_PREWRITE);
1843 * TSO is different enough, we handle it in another routine
1845 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1846 return mxge_encap_tso(tx, info_map, m, cnt);
1848 req = tx->req_list;
1849 cksum_offset = 0;
1850 pseudo_hdr_offset = 0;
1851 flags = MXGEFW_FLAGS_NO_TSO;
1854 * Checksum offloading
1856 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1857 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen;
1858 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
1859 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
1860 req->cksum_offset = cksum_offset;
1861 flags |= MXGEFW_FLAGS_CKSUM;
1862 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
1863 } else {
1864 odd_flag = 0;
1866 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
1867 flags |= MXGEFW_FLAGS_SMALL;
1870 * Convert segments into a request list
1872 cum_len = 0;
1873 seg = tx->seg_list;
1874 req->flags = MXGEFW_FLAGS_FIRST;
1875 for (i = 0; i < cnt; i++) {
1876 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
1877 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1878 req->length = htobe16(seg->ds_len);
1879 req->cksum_offset = cksum_offset;
1880 if (cksum_offset > seg->ds_len)
1881 cksum_offset -= seg->ds_len;
1882 else
1883 cksum_offset = 0;
1884 req->pseudo_hdr_offset = pseudo_hdr_offset;
1885 req->pad = 0; /* complete solid 16-byte block */
1886 req->rdma_count = 1;
1887 req->flags |= flags | ((cum_len & 1) * odd_flag);
1888 cum_len += seg->ds_len;
1889 seg++;
1890 req++;
1891 req->flags = 0;
1893 req--;
1896 * Pad runt to 60 bytes
1898 if (cum_len < 60) {
1899 req++;
1900 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(zeropad));
1901 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(zeropad));
1902 req->length = htobe16(60 - cum_len);
1903 req->cksum_offset = 0;
1904 req->pseudo_hdr_offset = pseudo_hdr_offset;
1905 req->pad = 0; /* complete solid 16-byte block */
1906 req->rdma_count = 1;
1907 req->flags |= flags | ((cum_len & 1) * odd_flag);
1908 cnt++;
1911 tx->req_list[0].rdma_count = cnt;
1912 #if 0
1913 /* print what the firmware will see */
1914 for (i = 0; i < cnt; i++) {
1915 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d,"
1916 "cso:%d, flags:0x%x, rdma:%d\n",
1917 i, (int)ntohl(tx->req_list[i].addr_high),
1918 (int)ntohl(tx->req_list[i].addr_low),
1919 (int)ntohs(tx->req_list[i].length),
1920 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
1921 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
1922 tx->req_list[i].rdma_count);
1924 kprintf("--------------\n");
1925 #endif
1926 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask];
1928 info_map->map = info_last->map;
1929 info_last->map = map;
1930 info_last->m = m;
1932 mxge_submit_req(tx, tx->req_list, cnt);
1934 if (tx->send_go != NULL && tx->queue_active == 0) {
1935 /* Tell the NIC to start polling this slice */
1936 *tx->send_go = 1;
1937 tx->queue_active = 1;
1938 tx->activate++;
1939 wmb();
1941 return 0;
1943 drop:
1944 m_freem(m);
1945 return err;
1948 static void
1949 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1951 mxge_softc_t *sc = ifp->if_softc;
1952 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq);
1953 bus_addr_t zeropad;
1954 int encap = 0;
1956 KKASSERT(tx->ifsq == ifsq);
1957 ASSERT_SERIALIZED(&tx->tx_serialize);
1959 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
1960 return;
1962 zeropad = sc->zeropad_dma.dmem_busaddr;
1963 while (tx->mask - (tx->req - tx->done) > tx->max_desc) {
1964 struct mbuf *m;
1965 int error;
1967 m = ifsq_dequeue(ifsq);
1968 if (m == NULL)
1969 goto done;
1971 BPF_MTAP(ifp, m);
1972 error = mxge_encap(tx, m, zeropad);
1973 if (!error)
1974 encap = 1;
1975 else
1976 IFNET_STAT_INC(ifp, oerrors, 1);
1979 /* Ran out of transmit slots */
1980 ifsq_set_oactive(ifsq);
1981 done:
1982 if (encap)
1983 tx->watchdog.wd_timer = 5;
1986 static void
1987 mxge_watchdog(struct ifaltq_subque *ifsq)
1989 struct ifnet *ifp = ifsq_get_ifp(ifsq);
1990 struct mxge_softc *sc = ifp->if_softc;
1991 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
1992 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq);
1994 ASSERT_IFNET_SERIALIZED_ALL(ifp);
1996 /* Check for pause blocking before resetting */
1997 if (tx->watchdog_rx_pause == rx_pause) {
1998 mxge_warn_stuck(sc, tx, 0);
1999 mxge_watchdog_reset(sc);
2000 return;
2001 } else {
2002 if_printf(ifp, "Flow control blocking xmits, "
2003 "check link partner\n");
2005 tx->watchdog_rx_pause = rx_pause;
2009 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2010 * at most 32 bytes at a time, so as to avoid involving the software
2011 * pio handler in the nic. We re-write the first segment's low
2012 * DMA address to mark it valid only after we write the entire chunk
2013 * in a burst
2015 static __inline void
2016 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2017 mcp_kreq_ether_recv_t *src)
2019 uint32_t low;
2021 low = src->addr_low;
2022 src->addr_low = 0xffffffff;
2023 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2024 wmb();
2025 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2026 wmb();
2027 src->addr_low = low;
2028 dst->addr_low = low;
2029 wmb();
2032 static int
2033 mxge_get_buf_small(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx,
2034 boolean_t init)
2036 bus_dma_segment_t seg;
2037 struct mbuf *m;
2038 int cnt, err, mflag;
2040 mflag = M_NOWAIT;
2041 if (__predict_false(init))
2042 mflag = M_WAITOK;
2044 m = m_gethdr(mflag, MT_DATA);
2045 if (m == NULL) {
2046 err = ENOBUFS;
2047 if (__predict_false(init)) {
2049 * During initialization, there
2050 * is nothing to setup; bail out
2052 return err;
2054 goto done;
2056 m->m_len = m->m_pkthdr.len = MHLEN;
2058 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2059 &seg, 1, &cnt, BUS_DMA_NOWAIT);
2060 if (err != 0) {
2061 m_freem(m);
2062 if (__predict_false(init)) {
2064 * During initialization, there
2065 * is nothing to setup; bail out
2067 return err;
2069 goto done;
2072 rx->info[idx].m = m;
2073 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2074 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2076 done:
2077 if ((idx & 7) == 7)
2078 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2079 return err;
2082 static int
2083 mxge_get_buf_big(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx,
2084 boolean_t init)
2086 bus_dma_segment_t seg;
2087 struct mbuf *m;
2088 int cnt, err, mflag;
2090 mflag = M_NOWAIT;
2091 if (__predict_false(init))
2092 mflag = M_WAITOK;
2094 if (rx->cl_size == MCLBYTES)
2095 m = m_getcl(mflag, MT_DATA, M_PKTHDR);
2096 else
2097 m = m_getjcl(mflag, MT_DATA, M_PKTHDR, MJUMPAGESIZE);
2098 if (m == NULL) {
2099 err = ENOBUFS;
2100 if (__predict_false(init)) {
2102 * During initialization, there
2103 * is nothing to setup; bail out
2105 return err;
2107 goto done;
2109 m->m_len = m->m_pkthdr.len = rx->cl_size;
2111 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2112 &seg, 1, &cnt, BUS_DMA_NOWAIT);
2113 if (err != 0) {
2114 m_freem(m);
2115 if (__predict_false(init)) {
2117 * During initialization, there
2118 * is nothing to setup; bail out
2120 return err;
2122 goto done;
2125 rx->info[idx].m = m;
2126 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2127 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2129 done:
2130 if ((idx & 7) == 7)
2131 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2132 return err;
2136 * Myri10GE hardware checksums are not valid if the sender
2137 * padded the frame with non-zero padding. This is because
2138 * the firmware just does a simple 16-bit 1s complement
2139 * checksum across the entire frame, excluding the first 14
2140 * bytes. It is best to simply to check the checksum and
2141 * tell the stack about it only if the checksum is good
2143 static __inline uint16_t
2144 mxge_rx_csum(struct mbuf *m, int csum)
2146 const struct ether_header *eh;
2147 const struct ip *ip;
2148 uint16_t c;
2150 eh = mtod(m, const struct ether_header *);
2152 /* Only deal with IPv4 TCP & UDP for now */
2153 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2154 return 1;
2156 ip = (const struct ip *)(eh + 1);
2157 if (__predict_false(ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP))
2158 return 1;
2160 #ifdef INET
2161 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2162 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2163 - (ip->ip_hl << 2) + ip->ip_p));
2164 #else
2165 c = 1;
2166 #endif
2167 c ^= 0xffff;
2168 return c;
2171 static void
2172 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2174 struct ether_vlan_header *evl;
2175 uint32_t partial;
2177 evl = mtod(m, struct ether_vlan_header *);
2180 * Fix checksum by subtracting EVL_ENCAPLEN bytes after
2181 * what the firmware thought was the end of the ethernet
2182 * header.
2185 /* Put checksum into host byte order */
2186 *csum = ntohs(*csum);
2188 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2189 *csum += ~partial;
2190 *csum += ((*csum) < ~partial);
2191 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2192 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2195 * Restore checksum to network byte order;
2196 * later consumers expect this
2198 *csum = htons(*csum);
2200 /* save the tag */
2201 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
2202 m->m_flags |= M_VLANTAG;
2205 * Remove the 802.1q header by copying the Ethernet
2206 * addresses over it and adjusting the beginning of
2207 * the data in the mbuf. The encapsulated Ethernet
2208 * type field is already in place.
2210 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN,
2211 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2212 m_adj(m, EVL_ENCAPLEN);
2216 static __inline void
2217 mxge_rx_done_big(struct ifnet *ifp, mxge_rx_ring_t *rx,
2218 uint32_t len, uint32_t csum)
2220 struct mbuf *m;
2221 const struct ether_header *eh;
2222 bus_dmamap_t old_map;
2223 int idx;
2225 idx = rx->cnt & rx->mask;
2226 rx->cnt++;
2228 /* Save a pointer to the received mbuf */
2229 m = rx->info[idx].m;
2231 /* Try to replace the received mbuf */
2232 if (mxge_get_buf_big(rx, rx->extra_map, idx, FALSE)) {
2233 /* Drop the frame -- the old mbuf is re-cycled */
2234 IFNET_STAT_INC(ifp, ierrors, 1);
2235 return;
2238 /* Unmap the received buffer */
2239 old_map = rx->info[idx].map;
2240 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2241 bus_dmamap_unload(rx->dmat, old_map);
2243 /* Swap the bus_dmamap_t's */
2244 rx->info[idx].map = rx->extra_map;
2245 rx->extra_map = old_map;
2248 * mcp implicitly skips 1st 2 bytes so that packet is properly
2249 * aligned
2251 m->m_data += MXGEFW_PAD;
2253 m->m_pkthdr.rcvif = ifp;
2254 m->m_len = m->m_pkthdr.len = len;
2256 IFNET_STAT_INC(ifp, ipackets, 1);
2258 eh = mtod(m, const struct ether_header *);
2259 if (eh->ether_type == htons(ETHERTYPE_VLAN))
2260 mxge_vlan_tag_remove(m, &csum);
2262 /* If the checksum is valid, mark it in the mbuf header */
2263 if ((ifp->if_capenable & IFCAP_RXCSUM) &&
2264 mxge_rx_csum(m, csum) == 0) {
2265 /* Tell the stack that the checksum is good */
2266 m->m_pkthdr.csum_data = 0xffff;
2267 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2268 CSUM_DATA_VALID;
2270 ifp->if_input(ifp, m, NULL, -1);
2273 static __inline void
2274 mxge_rx_done_small(struct ifnet *ifp, mxge_rx_ring_t *rx,
2275 uint32_t len, uint32_t csum)
2277 const struct ether_header *eh;
2278 struct mbuf *m;
2279 bus_dmamap_t old_map;
2280 int idx;
2282 idx = rx->cnt & rx->mask;
2283 rx->cnt++;
2285 /* Save a pointer to the received mbuf */
2286 m = rx->info[idx].m;
2288 /* Try to replace the received mbuf */
2289 if (mxge_get_buf_small(rx, rx->extra_map, idx, FALSE)) {
2290 /* Drop the frame -- the old mbuf is re-cycled */
2291 IFNET_STAT_INC(ifp, ierrors, 1);
2292 return;
2295 /* Unmap the received buffer */
2296 old_map = rx->info[idx].map;
2297 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2298 bus_dmamap_unload(rx->dmat, old_map);
2300 /* Swap the bus_dmamap_t's */
2301 rx->info[idx].map = rx->extra_map;
2302 rx->extra_map = old_map;
2305 * mcp implicitly skips 1st 2 bytes so that packet is properly
2306 * aligned
2308 m->m_data += MXGEFW_PAD;
2310 m->m_pkthdr.rcvif = ifp;
2311 m->m_len = m->m_pkthdr.len = len;
2313 IFNET_STAT_INC(ifp, ipackets, 1);
2315 eh = mtod(m, const struct ether_header *);
2316 if (eh->ether_type == htons(ETHERTYPE_VLAN))
2317 mxge_vlan_tag_remove(m, &csum);
2319 /* If the checksum is valid, mark it in the mbuf header */
2320 if ((ifp->if_capenable & IFCAP_RXCSUM) &&
2321 mxge_rx_csum(m, csum) == 0) {
2322 /* Tell the stack that the checksum is good */
2323 m->m_pkthdr.csum_data = 0xffff;
2324 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2325 CSUM_DATA_VALID;
2327 ifp->if_input(ifp, m, NULL, -1);
2330 static __inline void
2331 mxge_clean_rx_done(struct ifnet *ifp, struct mxge_rx_data *rx_data, int cycle)
2333 mxge_rx_done_t *rx_done = &rx_data->rx_done;
2335 while (rx_done->entry[rx_done->idx].length != 0 && cycle != 0) {
2336 uint16_t length, checksum;
2338 length = ntohs(rx_done->entry[rx_done->idx].length);
2339 rx_done->entry[rx_done->idx].length = 0;
2341 checksum = rx_done->entry[rx_done->idx].checksum;
2343 if (length <= MXGE_RX_SMALL_BUFLEN) {
2344 mxge_rx_done_small(ifp, &rx_data->rx_small,
2345 length, checksum);
2346 } else {
2347 mxge_rx_done_big(ifp, &rx_data->rx_big,
2348 length, checksum);
2351 rx_done->idx++;
2352 rx_done->idx &= rx_done->mask;
2353 --cycle;
2357 static __inline void
2358 mxge_tx_done(struct ifnet *ifp, mxge_tx_ring_t *tx, uint32_t mcp_idx)
2360 ASSERT_SERIALIZED(&tx->tx_serialize);
2362 while (tx->pkt_done != mcp_idx) {
2363 struct mbuf *m;
2364 int idx;
2366 idx = tx->done & tx->mask;
2367 tx->done++;
2369 m = tx->info[idx].m;
2371 * mbuf and DMA map only attached to the first
2372 * segment per-mbuf.
2374 if (m != NULL) {
2375 tx->pkt_done++;
2376 IFNET_STAT_INC(ifp, opackets, 1);
2377 tx->info[idx].m = NULL;
2378 bus_dmamap_unload(tx->dmat, tx->info[idx].map);
2379 m_freem(m);
2384 * If we have space, clear OACTIVE to tell the stack that
2385 * its OK to send packets
2387 if (tx->req - tx->done < (tx->mask + 1) / 2) {
2388 ifsq_clr_oactive(tx->ifsq);
2389 if (tx->req == tx->done) {
2390 /* Reset watchdog */
2391 tx->watchdog.wd_timer = 0;
2395 if (!ifsq_is_empty(tx->ifsq))
2396 ifsq_devstart(tx->ifsq);
2398 if (tx->send_stop != NULL && tx->req == tx->done) {
2400 * Let the NIC stop polling this queue, since there
2401 * are no more transmits pending
2403 *tx->send_stop = 1;
2404 tx->queue_active = 0;
2405 tx->deactivate++;
2406 wmb();
2410 static struct mxge_media_type mxge_xfp_media_types[] = {
2411 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2412 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2413 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2414 {IFM_NONE, (1 << 5), "10GBASE-ER"},
2415 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2416 {IFM_NONE, (1 << 3), "10GBASE-SW"},
2417 {IFM_NONE, (1 << 2), "10GBASE-LW"},
2418 {IFM_NONE, (1 << 1), "10GBASE-EW"},
2419 {IFM_NONE, (1 << 0), "Reserved"}
2422 static struct mxge_media_type mxge_sfp_media_types[] = {
2423 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2424 {IFM_NONE, (1 << 7), "Reserved"},
2425 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2426 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2427 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2428 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2431 static void
2432 mxge_media_set(mxge_softc_t *sc, int media_type)
2434 int fc_opt = 0;
2436 if (media_type == IFM_NONE)
2437 return;
2439 if (sc->pause)
2440 fc_opt = IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
2442 ifmedia_add(&sc->media, MXGE_IFM | media_type, 0, NULL);
2443 ifmedia_set(&sc->media, MXGE_IFM | media_type | fc_opt);
2445 sc->current_media = media_type;
2448 static void
2449 mxge_media_unset(mxge_softc_t *sc)
2451 ifmedia_removeall(&sc->media);
2452 sc->current_media = IFM_NONE;
2455 static void
2456 mxge_media_init(mxge_softc_t *sc)
2458 const char *ptr;
2459 int i;
2461 mxge_media_unset(sc);
2464 * Parse the product code to deterimine the interface type
2465 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2466 * after the 3rd dash in the driver's cached copy of the
2467 * EEPROM's product code string.
2469 ptr = sc->product_code_string;
2470 if (ptr == NULL) {
2471 if_printf(sc->ifp, "Missing product code\n");
2472 return;
2475 for (i = 0; i < 3; i++, ptr++) {
2476 ptr = strchr(ptr, '-');
2477 if (ptr == NULL) {
2478 if_printf(sc->ifp, "only %d dashes in PC?!?\n", i);
2479 return;
2482 if (*ptr == 'C' || *(ptr +1) == 'C') {
2483 /* -C is CX4 */
2484 sc->connector = MXGE_CX4;
2485 mxge_media_set(sc, IFM_10G_CX4);
2486 } else if (*ptr == 'Q') {
2487 /* -Q is Quad Ribbon Fiber */
2488 sc->connector = MXGE_QRF;
2489 if_printf(sc->ifp, "Quad Ribbon Fiber Media\n");
2490 /* DragonFly has no media type for Quad ribbon fiber */
2491 } else if (*ptr == 'R') {
2492 /* -R is XFP */
2493 sc->connector = MXGE_XFP;
2494 /* NOTE: ifmedia will be installed later */
2495 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2496 /* -S or -2S is SFP+ */
2497 sc->connector = MXGE_SFP;
2498 /* NOTE: ifmedia will be installed later */
2499 } else {
2500 sc->connector = MXGE_UNK;
2501 if_printf(sc->ifp, "Unknown media type: %c\n", *ptr);
2506 * Determine the media type for a NIC. Some XFPs will identify
2507 * themselves only when their link is up, so this is initiated via a
2508 * link up interrupt. However, this can potentially take up to
2509 * several milliseconds, so it is run via the watchdog routine, rather
2510 * than in the interrupt handler itself.
2512 static void
2513 mxge_media_probe(mxge_softc_t *sc)
2515 mxge_cmd_t cmd;
2516 const char *cage_type;
2517 struct mxge_media_type *mxge_media_types = NULL;
2518 int i, err, ms, mxge_media_type_entries;
2519 uint32_t byte;
2521 sc->need_media_probe = 0;
2523 if (sc->connector == MXGE_XFP) {
2524 /* -R is XFP */
2525 mxge_media_types = mxge_xfp_media_types;
2526 mxge_media_type_entries = NELEM(mxge_xfp_media_types);
2527 byte = MXGE_XFP_COMPLIANCE_BYTE;
2528 cage_type = "XFP";
2529 } else if (sc->connector == MXGE_SFP) {
2530 /* -S or -2S is SFP+ */
2531 mxge_media_types = mxge_sfp_media_types;
2532 mxge_media_type_entries = NELEM(mxge_sfp_media_types);
2533 cage_type = "SFP+";
2534 byte = 3;
2535 } else {
2536 /* nothing to do; media type cannot change */
2537 return;
2541 * At this point we know the NIC has an XFP cage, so now we
2542 * try to determine what is in the cage by using the
2543 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2544 * register. We read just one byte, which may take over
2545 * a millisecond
2548 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */
2549 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2550 cmd.data1 = byte;
2551 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2552 if (err != MXGEFW_CMD_OK) {
2553 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE)
2554 if_printf(sc->ifp, "failed to read XFP\n");
2555 else if (err == MXGEFW_CMD_ERROR_I2C_ABSENT)
2556 if_printf(sc->ifp, "Type R/S with no XFP!?!?\n");
2557 else
2558 if_printf(sc->ifp, "I2C read failed, err: %d", err);
2559 mxge_media_unset(sc);
2560 return;
2563 /* Now we wait for the data to be cached */
2564 cmd.data0 = byte;
2565 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2566 for (ms = 0; err == EBUSY && ms < 50; ms++) {
2567 DELAY(1000);
2568 cmd.data0 = byte;
2569 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2571 if (err != MXGEFW_CMD_OK) {
2572 if_printf(sc->ifp, "failed to read %s (%d, %dms)\n",
2573 cage_type, err, ms);
2574 mxge_media_unset(sc);
2575 return;
2578 if (cmd.data0 == mxge_media_types[0].bitmask) {
2579 if (bootverbose) {
2580 if_printf(sc->ifp, "%s:%s\n", cage_type,
2581 mxge_media_types[0].name);
2583 if (sc->current_media != mxge_media_types[0].flag) {
2584 mxge_media_unset(sc);
2585 mxge_media_set(sc, mxge_media_types[0].flag);
2587 return;
2589 for (i = 1; i < mxge_media_type_entries; i++) {
2590 if (cmd.data0 & mxge_media_types[i].bitmask) {
2591 if (bootverbose) {
2592 if_printf(sc->ifp, "%s:%s\n", cage_type,
2593 mxge_media_types[i].name);
2596 if (sc->current_media != mxge_media_types[i].flag) {
2597 mxge_media_unset(sc);
2598 mxge_media_set(sc, mxge_media_types[i].flag);
2600 return;
2603 mxge_media_unset(sc);
2604 if (bootverbose) {
2605 if_printf(sc->ifp, "%s media 0x%x unknown\n", cage_type,
2606 cmd.data0);
2610 static void
2611 mxge_intr_status(struct mxge_softc *sc, const mcp_irq_data_t *stats)
2613 if (sc->link_state != stats->link_up) {
2614 sc->link_state = stats->link_up;
2615 if (sc->link_state) {
2616 sc->ifp->if_link_state = LINK_STATE_UP;
2617 if_link_state_change(sc->ifp);
2618 if (bootverbose)
2619 if_printf(sc->ifp, "link up\n");
2620 } else {
2621 sc->ifp->if_link_state = LINK_STATE_DOWN;
2622 if_link_state_change(sc->ifp);
2623 if (bootverbose)
2624 if_printf(sc->ifp, "link down\n");
2626 sc->need_media_probe = 1;
2629 if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) {
2630 sc->rdma_tags_available = be32toh(stats->rdma_tags_available);
2631 if_printf(sc->ifp, "RDMA timed out! %d tags left\n",
2632 sc->rdma_tags_available);
2635 if (stats->link_down) {
2636 sc->down_cnt += stats->link_down;
2637 sc->link_state = 0;
2638 sc->ifp->if_link_state = LINK_STATE_DOWN;
2639 if_link_state_change(sc->ifp);
2643 static void
2644 mxge_serialize_skipmain(struct mxge_softc *sc)
2646 lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1);
2649 static void
2650 mxge_deserialize_skipmain(struct mxge_softc *sc)
2652 lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1);
2655 static void
2656 mxge_legacy(void *arg)
2658 struct mxge_slice_state *ss = arg;
2659 mxge_softc_t *sc = ss->sc;
2660 mcp_irq_data_t *stats = ss->fw_stats;
2661 mxge_tx_ring_t *tx = &ss->tx;
2662 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2663 uint32_t send_done_count;
2664 uint8_t valid;
2666 ASSERT_SERIALIZED(&sc->main_serialize);
2668 /* Make sure the DMA has finished */
2669 if (!stats->valid)
2670 return;
2671 valid = stats->valid;
2673 /* Lower legacy IRQ */
2674 *sc->irq_deassert = 0;
2675 if (!mxge_deassert_wait) {
2676 /* Don't wait for conf. that irq is low */
2677 stats->valid = 0;
2680 mxge_serialize_skipmain(sc);
2683 * Loop while waiting for legacy irq deassertion
2684 * XXX do we really want to loop?
2686 do {
2687 /* Check for transmit completes and receives */
2688 send_done_count = be32toh(stats->send_done_count);
2689 while ((send_done_count != tx->pkt_done) ||
2690 (rx_done->entry[rx_done->idx].length != 0)) {
2691 if (send_done_count != tx->pkt_done) {
2692 mxge_tx_done(&sc->arpcom.ac_if, tx,
2693 (int)send_done_count);
2695 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2696 send_done_count = be32toh(stats->send_done_count);
2698 if (mxge_deassert_wait)
2699 wmb();
2700 } while (*((volatile uint8_t *)&stats->valid));
2702 mxge_deserialize_skipmain(sc);
2704 /* Fw link & error stats meaningful only on the first slice */
2705 if (__predict_false(stats->stats_updated))
2706 mxge_intr_status(sc, stats);
2708 /* Check to see if we have rx token to pass back */
2709 if (valid & 0x1)
2710 *ss->irq_claim = be32toh(3);
2711 *(ss->irq_claim + 1) = be32toh(3);
2714 static void
2715 mxge_msi(void *arg)
2717 struct mxge_slice_state *ss = arg;
2718 mxge_softc_t *sc = ss->sc;
2719 mcp_irq_data_t *stats = ss->fw_stats;
2720 mxge_tx_ring_t *tx = &ss->tx;
2721 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2722 uint32_t send_done_count;
2723 uint8_t valid;
2724 #ifndef IFPOLL_ENABLE
2725 const boolean_t polling = FALSE;
2726 #else
2727 boolean_t polling = FALSE;
2728 #endif
2730 ASSERT_SERIALIZED(&sc->main_serialize);
2732 /* Make sure the DMA has finished */
2733 if (__predict_false(!stats->valid))
2734 return;
2736 valid = stats->valid;
2737 stats->valid = 0;
2739 #ifdef IFPOLL_ENABLE
2740 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2741 polling = TRUE;
2742 #endif
2744 if (!polling) {
2745 /* Check for receives */
2746 lwkt_serialize_enter(&ss->rx_data.rx_serialize);
2747 if (rx_done->entry[rx_done->idx].length != 0)
2748 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2749 lwkt_serialize_exit(&ss->rx_data.rx_serialize);
2753 * Check for transmit completes
2755 * NOTE:
2756 * Since pkt_done is only changed by mxge_tx_done(),
2757 * which is called only in interrupt handler, the
2758 * check w/o holding tx serializer is MPSAFE.
2760 send_done_count = be32toh(stats->send_done_count);
2761 if (send_done_count != tx->pkt_done) {
2762 lwkt_serialize_enter(&tx->tx_serialize);
2763 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count);
2764 lwkt_serialize_exit(&tx->tx_serialize);
2767 if (__predict_false(stats->stats_updated))
2768 mxge_intr_status(sc, stats);
2770 /* Check to see if we have rx token to pass back */
2771 if (!polling && (valid & 0x1))
2772 *ss->irq_claim = be32toh(3);
2773 *(ss->irq_claim + 1) = be32toh(3);
2776 static void
2777 mxge_msix_rx(void *arg)
2779 struct mxge_slice_state *ss = arg;
2780 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2782 #ifdef IFPOLL_ENABLE
2783 if (ss->sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2784 return;
2785 #endif
2787 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
2789 if (rx_done->entry[rx_done->idx].length != 0)
2790 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, -1);
2792 *ss->irq_claim = be32toh(3);
2795 static void
2796 mxge_msix_rxtx(void *arg)
2798 struct mxge_slice_state *ss = arg;
2799 mxge_softc_t *sc = ss->sc;
2800 mcp_irq_data_t *stats = ss->fw_stats;
2801 mxge_tx_ring_t *tx = &ss->tx;
2802 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2803 uint32_t send_done_count;
2804 uint8_t valid;
2805 #ifndef IFPOLL_ENABLE
2806 const boolean_t polling = FALSE;
2807 #else
2808 boolean_t polling = FALSE;
2809 #endif
2811 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
2813 /* Make sure the DMA has finished */
2814 if (__predict_false(!stats->valid))
2815 return;
2817 valid = stats->valid;
2818 stats->valid = 0;
2820 #ifdef IFPOLL_ENABLE
2821 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2822 polling = TRUE;
2823 #endif
2825 /* Check for receives */
2826 if (!polling && rx_done->entry[rx_done->idx].length != 0)
2827 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2830 * Check for transmit completes
2832 * NOTE:
2833 * Since pkt_done is only changed by mxge_tx_done(),
2834 * which is called only in interrupt handler, the
2835 * check w/o holding tx serializer is MPSAFE.
2837 send_done_count = be32toh(stats->send_done_count);
2838 if (send_done_count != tx->pkt_done) {
2839 lwkt_serialize_enter(&tx->tx_serialize);
2840 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count);
2841 lwkt_serialize_exit(&tx->tx_serialize);
2844 /* Check to see if we have rx token to pass back */
2845 if (!polling && (valid & 0x1))
2846 *ss->irq_claim = be32toh(3);
2847 *(ss->irq_claim + 1) = be32toh(3);
2850 static void
2851 mxge_init(void *arg)
2853 struct mxge_softc *sc = arg;
2855 ASSERT_IFNET_SERIALIZED_ALL(sc->ifp);
2856 if ((sc->ifp->if_flags & IFF_RUNNING) == 0)
2857 mxge_open(sc);
2860 static void
2861 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
2863 int i;
2865 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
2866 if (ss->rx_data.rx_big.info[i].m == NULL)
2867 continue;
2868 bus_dmamap_unload(ss->rx_data.rx_big.dmat,
2869 ss->rx_data.rx_big.info[i].map);
2870 m_freem(ss->rx_data.rx_big.info[i].m);
2871 ss->rx_data.rx_big.info[i].m = NULL;
2874 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
2875 if (ss->rx_data.rx_small.info[i].m == NULL)
2876 continue;
2877 bus_dmamap_unload(ss->rx_data.rx_small.dmat,
2878 ss->rx_data.rx_small.info[i].map);
2879 m_freem(ss->rx_data.rx_small.info[i].m);
2880 ss->rx_data.rx_small.info[i].m = NULL;
2883 /* Transmit ring used only on the first slice */
2884 if (ss->tx.info == NULL)
2885 return;
2887 for (i = 0; i <= ss->tx.mask; i++) {
2888 if (ss->tx.info[i].m == NULL)
2889 continue;
2890 bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map);
2891 m_freem(ss->tx.info[i].m);
2892 ss->tx.info[i].m = NULL;
2896 static void
2897 mxge_free_mbufs(mxge_softc_t *sc)
2899 int slice;
2901 for (slice = 0; slice < sc->num_slices; slice++)
2902 mxge_free_slice_mbufs(&sc->ss[slice]);
2905 static void
2906 mxge_free_slice_rings(struct mxge_slice_state *ss)
2908 int i;
2910 if (ss->rx_data.rx_done.entry != NULL) {
2911 mxge_dma_free(&ss->rx_done_dma);
2912 ss->rx_data.rx_done.entry = NULL;
2915 if (ss->tx.req_list != NULL) {
2916 kfree(ss->tx.req_list, M_DEVBUF);
2917 ss->tx.req_list = NULL;
2920 if (ss->tx.seg_list != NULL) {
2921 kfree(ss->tx.seg_list, M_DEVBUF);
2922 ss->tx.seg_list = NULL;
2925 if (ss->rx_data.rx_small.shadow != NULL) {
2926 kfree(ss->rx_data.rx_small.shadow, M_DEVBUF);
2927 ss->rx_data.rx_small.shadow = NULL;
2930 if (ss->rx_data.rx_big.shadow != NULL) {
2931 kfree(ss->rx_data.rx_big.shadow, M_DEVBUF);
2932 ss->rx_data.rx_big.shadow = NULL;
2935 if (ss->tx.info != NULL) {
2936 if (ss->tx.dmat != NULL) {
2937 for (i = 0; i <= ss->tx.mask; i++) {
2938 bus_dmamap_destroy(ss->tx.dmat,
2939 ss->tx.info[i].map);
2941 bus_dma_tag_destroy(ss->tx.dmat);
2943 kfree(ss->tx.info, M_DEVBUF);
2944 ss->tx.info = NULL;
2947 if (ss->rx_data.rx_small.info != NULL) {
2948 if (ss->rx_data.rx_small.dmat != NULL) {
2949 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
2950 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
2951 ss->rx_data.rx_small.info[i].map);
2953 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
2954 ss->rx_data.rx_small.extra_map);
2955 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
2957 kfree(ss->rx_data.rx_small.info, M_DEVBUF);
2958 ss->rx_data.rx_small.info = NULL;
2961 if (ss->rx_data.rx_big.info != NULL) {
2962 if (ss->rx_data.rx_big.dmat != NULL) {
2963 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
2964 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
2965 ss->rx_data.rx_big.info[i].map);
2967 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
2968 ss->rx_data.rx_big.extra_map);
2969 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
2971 kfree(ss->rx_data.rx_big.info, M_DEVBUF);
2972 ss->rx_data.rx_big.info = NULL;
2976 static void
2977 mxge_free_rings(mxge_softc_t *sc)
2979 int slice;
2981 if (sc->ss == NULL)
2982 return;
2984 for (slice = 0; slice < sc->num_slices; slice++)
2985 mxge_free_slice_rings(&sc->ss[slice]);
2988 static int
2989 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
2990 int tx_ring_entries)
2992 mxge_softc_t *sc = ss->sc;
2993 size_t bytes;
2994 int err, i;
2997 * Allocate per-slice receive resources
3000 ss->rx_data.rx_small.mask = ss->rx_data.rx_big.mask =
3001 rx_ring_entries - 1;
3002 ss->rx_data.rx_done.mask = (2 * rx_ring_entries) - 1;
3004 /* Allocate the rx shadow rings */
3005 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.shadow);
3006 ss->rx_data.rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3008 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.shadow);
3009 ss->rx_data.rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3011 /* Allocate the rx host info rings */
3012 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.info);
3013 ss->rx_data.rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3015 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.info);
3016 ss->rx_data.rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3018 /* Allocate the rx busdma resources */
3019 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3020 1, /* alignment */
3021 4096, /* boundary */
3022 BUS_SPACE_MAXADDR, /* low */
3023 BUS_SPACE_MAXADDR, /* high */
3024 NULL, NULL, /* filter */
3025 MHLEN, /* maxsize */
3026 1, /* num segs */
3027 MHLEN, /* maxsegsize */
3028 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
3029 /* flags */
3030 &ss->rx_data.rx_small.dmat); /* tag */
3031 if (err != 0) {
3032 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3033 err);
3034 return err;
3037 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, BUS_DMA_WAITOK,
3038 &ss->rx_data.rx_small.extra_map);
3039 if (err != 0) {
3040 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err);
3041 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
3042 ss->rx_data.rx_small.dmat = NULL;
3043 return err;
3045 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
3046 err = bus_dmamap_create(ss->rx_data.rx_small.dmat,
3047 BUS_DMA_WAITOK, &ss->rx_data.rx_small.info[i].map);
3048 if (err != 0) {
3049 int j;
3051 device_printf(sc->dev, "Err %d rx_small dmamap\n", err);
3053 for (j = 0; j < i; ++j) {
3054 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
3055 ss->rx_data.rx_small.info[j].map);
3057 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
3058 ss->rx_data.rx_small.extra_map);
3059 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
3060 ss->rx_data.rx_small.dmat = NULL;
3061 return err;
3065 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3066 1, /* alignment */
3067 4096, /* boundary */
3068 BUS_SPACE_MAXADDR, /* low */
3069 BUS_SPACE_MAXADDR, /* high */
3070 NULL, NULL, /* filter */
3071 4096, /* maxsize */
3072 1, /* num segs */
3073 4096, /* maxsegsize*/
3074 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
3075 /* flags */
3076 &ss->rx_data.rx_big.dmat); /* tag */
3077 if (err != 0) {
3078 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3079 err);
3080 return err;
3083 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK,
3084 &ss->rx_data.rx_big.extra_map);
3085 if (err != 0) {
3086 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err);
3087 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
3088 ss->rx_data.rx_big.dmat = NULL;
3089 return err;
3091 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3092 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK,
3093 &ss->rx_data.rx_big.info[i].map);
3094 if (err != 0) {
3095 int j;
3097 device_printf(sc->dev, "Err %d rx_big dmamap\n", err);
3098 for (j = 0; j < i; ++j) {
3099 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
3100 ss->rx_data.rx_big.info[j].map);
3102 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
3103 ss->rx_data.rx_big.extra_map);
3104 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
3105 ss->rx_data.rx_big.dmat = NULL;
3106 return err;
3111 * Now allocate TX resources
3114 ss->tx.mask = tx_ring_entries - 1;
3115 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3118 * Allocate the tx request copy block; MUST be at least 8 bytes
3119 * aligned
3121 bytes = sizeof(*ss->tx.req_list) * (ss->tx.max_desc + 4);
3122 ss->tx.req_list = kmalloc_cachealign(__VM_CACHELINE_ALIGN(bytes),
3123 M_DEVBUF, M_WAITOK);
3125 /* Allocate the tx busdma segment list */
3126 bytes = sizeof(*ss->tx.seg_list) * ss->tx.max_desc;
3127 ss->tx.seg_list = kmalloc(bytes, M_DEVBUF, M_WAITOK);
3129 /* Allocate the tx host info ring */
3130 bytes = tx_ring_entries * sizeof(*ss->tx.info);
3131 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3133 /* Allocate the tx busdma resources */
3134 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3135 1, /* alignment */
3136 sc->tx_boundary, /* boundary */
3137 BUS_SPACE_MAXADDR, /* low */
3138 BUS_SPACE_MAXADDR, /* high */
3139 NULL, NULL, /* filter */
3140 IP_MAXPACKET +
3141 sizeof(struct ether_vlan_header),
3142 /* maxsize */
3143 ss->tx.max_desc - 2, /* num segs */
3144 sc->tx_boundary, /* maxsegsz */
3145 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
3146 BUS_DMA_ONEBPAGE, /* flags */
3147 &ss->tx.dmat); /* tag */
3148 if (err != 0) {
3149 device_printf(sc->dev, "Err %d allocating tx dmat\n", err);
3150 return err;
3154 * Now use these tags to setup DMA maps for each slot in the ring
3156 for (i = 0; i <= ss->tx.mask; i++) {
3157 err = bus_dmamap_create(ss->tx.dmat,
3158 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &ss->tx.info[i].map);
3159 if (err != 0) {
3160 int j;
3162 device_printf(sc->dev, "Err %d tx dmamap\n", err);
3163 for (j = 0; j < i; ++j) {
3164 bus_dmamap_destroy(ss->tx.dmat,
3165 ss->tx.info[j].map);
3167 bus_dma_tag_destroy(ss->tx.dmat);
3168 ss->tx.dmat = NULL;
3169 return err;
3172 return 0;
3175 static int
3176 mxge_alloc_rings(mxge_softc_t *sc)
3178 mxge_cmd_t cmd;
3179 int tx_ring_size;
3180 int tx_ring_entries, rx_ring_entries;
3181 int err, slice;
3183 /* Get ring sizes */
3184 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3185 if (err != 0) {
3186 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3187 return err;
3189 tx_ring_size = cmd.data0;
3191 tx_ring_entries = tx_ring_size / sizeof(mcp_kreq_ether_send_t);
3192 rx_ring_entries = sc->rx_intr_slots / 2;
3194 if (bootverbose) {
3195 device_printf(sc->dev, "tx desc %d, rx desc %d\n",
3196 tx_ring_entries, rx_ring_entries);
3199 sc->ifp->if_nmbclusters = rx_ring_entries * sc->num_slices;
3200 sc->ifp->if_nmbjclusters = sc->ifp->if_nmbclusters;
3202 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1);
3203 ifq_set_ready(&sc->ifp->if_snd);
3204 ifq_set_subq_cnt(&sc->ifp->if_snd, sc->num_tx_rings);
3206 if (sc->num_tx_rings > 1) {
3207 sc->ifp->if_mapsubq = ifq_mapsubq_mask;
3208 ifq_set_subq_mask(&sc->ifp->if_snd, sc->num_tx_rings - 1);
3211 for (slice = 0; slice < sc->num_slices; slice++) {
3212 err = mxge_alloc_slice_rings(&sc->ss[slice],
3213 rx_ring_entries, tx_ring_entries);
3214 if (err != 0) {
3215 device_printf(sc->dev,
3216 "alloc %d slice rings failed\n", slice);
3217 return err;
3220 return 0;
3223 static void
3224 mxge_choose_params(int mtu, int *cl_size)
3226 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD;
3228 if (bufsize < MCLBYTES) {
3229 *cl_size = MCLBYTES;
3230 } else {
3231 KASSERT(bufsize < MJUMPAGESIZE, ("invalid MTU %d", mtu));
3232 *cl_size = MJUMPAGESIZE;
3236 static int
3237 mxge_slice_open(struct mxge_slice_state *ss, int cl_size)
3239 mxge_cmd_t cmd;
3240 int err, i, slice;
3242 slice = ss - ss->sc->ss;
3245 * Get the lanai pointers to the send and receive rings
3247 err = 0;
3249 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */
3250 if (ss->sc->num_tx_rings == 1) {
3251 if (slice == 0) {
3252 cmd.data0 = slice;
3253 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET,
3254 &cmd);
3255 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *)
3256 (ss->sc->sram + cmd.data0);
3257 /* Leave send_go and send_stop as NULL */
3259 } else {
3260 cmd.data0 = slice;
3261 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3262 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *)
3263 (ss->sc->sram + cmd.data0);
3264 ss->tx.send_go = (volatile uint32_t *)
3265 (ss->sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3266 ss->tx.send_stop = (volatile uint32_t *)
3267 (ss->sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3270 cmd.data0 = slice;
3271 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3272 ss->rx_data.rx_small.lanai =
3273 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0);
3275 cmd.data0 = slice;
3276 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3277 ss->rx_data.rx_big.lanai =
3278 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0);
3280 if (err != 0) {
3281 if_printf(ss->sc->ifp,
3282 "failed to get ring sizes or locations\n");
3283 return EIO;
3287 * Stock small receive ring
3289 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
3290 err = mxge_get_buf_small(&ss->rx_data.rx_small,
3291 ss->rx_data.rx_small.info[i].map, i, TRUE);
3292 if (err) {
3293 if_printf(ss->sc->ifp, "alloced %d/%d smalls\n", i,
3294 ss->rx_data.rx_small.mask + 1);
3295 return ENOMEM;
3300 * Stock big receive ring
3302 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3303 ss->rx_data.rx_big.shadow[i].addr_low = 0xffffffff;
3304 ss->rx_data.rx_big.shadow[i].addr_high = 0xffffffff;
3307 ss->rx_data.rx_big.cl_size = cl_size;
3309 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3310 err = mxge_get_buf_big(&ss->rx_data.rx_big,
3311 ss->rx_data.rx_big.info[i].map, i, TRUE);
3312 if (err) {
3313 if_printf(ss->sc->ifp, "alloced %d/%d bigs\n", i,
3314 ss->rx_data.rx_big.mask + 1);
3315 return ENOMEM;
3318 return 0;
3321 static int
3322 mxge_open(mxge_softc_t *sc)
3324 struct ifnet *ifp = sc->ifp;
3325 mxge_cmd_t cmd;
3326 int err, slice, cl_size, i;
3327 bus_addr_t bus;
3328 volatile uint8_t *itable;
3329 struct mxge_slice_state *ss;
3331 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3333 /* Copy the MAC address in case it was overridden */
3334 bcopy(IF_LLADDR(ifp), sc->mac_addr, ETHER_ADDR_LEN);
3336 err = mxge_reset(sc, 1);
3337 if (err != 0) {
3338 if_printf(ifp, "failed to reset\n");
3339 return EIO;
3342 if (sc->num_slices > 1) {
3343 /* Setup the indirection table */
3344 cmd.data0 = sc->num_slices;
3345 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd);
3347 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd);
3348 if (err != 0) {
3349 if_printf(ifp, "failed to setup rss tables\n");
3350 return err;
3353 /* Just enable an identity mapping */
3354 itable = sc->sram + cmd.data0;
3355 for (i = 0; i < sc->num_slices; i++)
3356 itable[i] = (uint8_t)i;
3358 if (sc->use_rss) {
3359 volatile uint8_t *hwkey;
3360 uint8_t swkey[MXGE_HWRSS_KEYLEN];
3362 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_KEY_OFFSET,
3363 &cmd);
3364 if (err != 0) {
3365 if_printf(ifp, "failed to get rsskey\n");
3366 return err;
3368 hwkey = sc->sram + cmd.data0;
3370 toeplitz_get_key(swkey, MXGE_HWRSS_KEYLEN);
3371 for (i = 0; i < MXGE_HWRSS_KEYLEN; ++i)
3372 hwkey[i] = swkey[i];
3373 wmb();
3375 err = mxge_send_cmd(sc, MXGEFW_CMD_RSS_KEY_UPDATED,
3376 &cmd);
3377 if (err != 0) {
3378 if_printf(ifp, "failed to update rsskey\n");
3379 return err;
3381 if (bootverbose)
3382 if_printf(ifp, "RSS key updated\n");
3385 cmd.data0 = 1;
3386 if (sc->use_rss) {
3387 if (bootverbose)
3388 if_printf(ifp, "input hash: RSS\n");
3389 cmd.data1 = MXGEFW_RSS_HASH_TYPE_IPV4 |
3390 MXGEFW_RSS_HASH_TYPE_TCP_IPV4;
3391 } else {
3392 if (bootverbose)
3393 if_printf(ifp, "input hash: SRC_DST_PORT\n");
3394 cmd.data1 = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
3396 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3397 if (err != 0) {
3398 if_printf(ifp, "failed to enable slices\n");
3399 return err;
3403 cmd.data0 = MXGEFW_TSO_MODE_NDIS;
3404 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_TSO_MODE, &cmd);
3405 if (err) {
3407 * Can't change TSO mode to NDIS, never allow TSO then
3409 if_printf(ifp, "failed to set TSO mode\n");
3410 ifp->if_capenable &= ~IFCAP_TSO;
3411 ifp->if_capabilities &= ~IFCAP_TSO;
3412 ifp->if_hwassist &= ~CSUM_TSO;
3415 mxge_choose_params(ifp->if_mtu, &cl_size);
3417 cmd.data0 = 1;
3418 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd);
3420 * Error is only meaningful if we're trying to set
3421 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1
3425 * Give the firmware the mtu and the big and small buffer
3426 * sizes. The firmware wants the big buf size to be a power
3427 * of two. Luckily, DragonFly's clusters are powers of two
3429 cmd.data0 = ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3430 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3432 cmd.data0 = MXGE_RX_SMALL_BUFLEN;
3433 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd);
3435 cmd.data0 = cl_size;
3436 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3438 if (err != 0) {
3439 if_printf(ifp, "failed to setup params\n");
3440 goto abort;
3443 /* Now give him the pointer to the stats block */
3444 for (slice = 0; slice < sc->num_slices; slice++) {
3445 ss = &sc->ss[slice];
3446 cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.dmem_busaddr);
3447 cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.dmem_busaddr);
3448 cmd.data2 = sizeof(struct mcp_irq_data);
3449 cmd.data2 |= (slice << 16);
3450 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3453 if (err != 0) {
3454 bus = sc->ss->fw_stats_dma.dmem_busaddr;
3455 bus += offsetof(struct mcp_irq_data, send_done_count);
3456 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3457 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3458 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3459 &cmd);
3461 /* Firmware cannot support multicast without STATS_DMA_V2 */
3462 sc->fw_multicast_support = 0;
3463 } else {
3464 sc->fw_multicast_support = 1;
3467 if (err != 0) {
3468 if_printf(ifp, "failed to setup params\n");
3469 goto abort;
3472 for (slice = 0; slice < sc->num_slices; slice++) {
3473 err = mxge_slice_open(&sc->ss[slice], cl_size);
3474 if (err != 0) {
3475 if_printf(ifp, "couldn't open slice %d\n", slice);
3476 goto abort;
3480 /* Finally, start the firmware running */
3481 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3482 if (err) {
3483 if_printf(ifp, "Couldn't bring up link\n");
3484 goto abort;
3487 ifp->if_flags |= IFF_RUNNING;
3488 for (i = 0; i < sc->num_tx_rings; ++i) {
3489 mxge_tx_ring_t *tx = &sc->ss[i].tx;
3491 ifsq_clr_oactive(tx->ifsq);
3492 ifsq_watchdog_start(&tx->watchdog);
3495 return 0;
3497 abort:
3498 mxge_free_mbufs(sc);
3499 return err;
3502 static void
3503 mxge_close(mxge_softc_t *sc, int down)
3505 struct ifnet *ifp = sc->ifp;
3506 mxge_cmd_t cmd;
3507 int err, old_down_cnt, i;
3509 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3511 if (!down) {
3512 old_down_cnt = sc->down_cnt;
3513 wmb();
3515 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3516 if (err)
3517 if_printf(ifp, "Couldn't bring down link\n");
3519 if (old_down_cnt == sc->down_cnt) {
3521 * Wait for down irq
3522 * XXX racy
3524 ifnet_deserialize_all(ifp);
3525 DELAY(10 * sc->intr_coal_delay);
3526 ifnet_serialize_all(ifp);
3529 wmb();
3530 if (old_down_cnt == sc->down_cnt)
3531 if_printf(ifp, "never got down irq\n");
3533 mxge_free_mbufs(sc);
3535 ifp->if_flags &= ~IFF_RUNNING;
3536 for (i = 0; i < sc->num_tx_rings; ++i) {
3537 mxge_tx_ring_t *tx = &sc->ss[i].tx;
3539 ifsq_clr_oactive(tx->ifsq);
3540 ifsq_watchdog_stop(&tx->watchdog);
3544 static void
3545 mxge_setup_cfg_space(mxge_softc_t *sc)
3547 device_t dev = sc->dev;
3548 int reg;
3549 uint16_t lnk, pectl;
3551 /* Find the PCIe link width and set max read request to 4KB */
3552 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3553 lnk = pci_read_config(dev, reg + 0x12, 2);
3554 sc->link_width = (lnk >> 4) & 0x3f;
3556 if (sc->pectl == 0) {
3557 pectl = pci_read_config(dev, reg + 0x8, 2);
3558 pectl = (pectl & ~0x7000) | (5 << 12);
3559 pci_write_config(dev, reg + 0x8, pectl, 2);
3560 sc->pectl = pectl;
3561 } else {
3562 /* Restore saved pectl after watchdog reset */
3563 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3567 /* Enable DMA and memory space access */
3568 pci_enable_busmaster(dev);
3571 static uint32_t
3572 mxge_read_reboot(mxge_softc_t *sc)
3574 device_t dev = sc->dev;
3575 uint32_t vs;
3577 /* Find the vendor specific offset */
3578 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3579 if_printf(sc->ifp, "could not find vendor specific offset\n");
3580 return (uint32_t)-1;
3582 /* Enable read32 mode */
3583 pci_write_config(dev, vs + 0x10, 0x3, 1);
3584 /* Tell NIC which register to read */
3585 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3586 return pci_read_config(dev, vs + 0x14, 4);
3589 static void
3590 mxge_watchdog_reset(mxge_softc_t *sc)
3592 struct pci_devinfo *dinfo;
3593 int err, running;
3594 uint32_t reboot;
3595 uint16_t cmd;
3597 err = ENXIO;
3599 if_printf(sc->ifp, "Watchdog reset!\n");
3602 * Check to see if the NIC rebooted. If it did, then all of
3603 * PCI config space has been reset, and things like the
3604 * busmaster bit will be zero. If this is the case, then we
3605 * must restore PCI config space before the NIC can be used
3606 * again
3608 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3609 if (cmd == 0xffff) {
3611 * Maybe the watchdog caught the NIC rebooting; wait
3612 * up to 100ms for it to finish. If it does not come
3613 * back, then give up
3615 DELAY(1000*100);
3616 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3617 if (cmd == 0xffff)
3618 if_printf(sc->ifp, "NIC disappeared!\n");
3620 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3621 /* Print the reboot status */
3622 reboot = mxge_read_reboot(sc);
3623 if_printf(sc->ifp, "NIC rebooted, status = 0x%x\n", reboot);
3625 running = sc->ifp->if_flags & IFF_RUNNING;
3626 if (running) {
3628 * Quiesce NIC so that TX routines will not try to
3629 * xmit after restoration of BAR
3632 /* Mark the link as down */
3633 if (sc->link_state) {
3634 sc->ifp->if_link_state = LINK_STATE_DOWN;
3635 if_link_state_change(sc->ifp);
3637 mxge_close(sc, 1);
3639 /* Restore PCI configuration space */
3640 dinfo = device_get_ivars(sc->dev);
3641 pci_cfg_restore(sc->dev, dinfo);
3643 /* And redo any changes we made to our config space */
3644 mxge_setup_cfg_space(sc);
3646 /* Reload f/w */
3647 err = mxge_load_firmware(sc, 0);
3648 if (err)
3649 if_printf(sc->ifp, "Unable to re-load f/w\n");
3650 if (running && !err) {
3651 int i;
3653 err = mxge_open(sc);
3655 for (i = 0; i < sc->num_tx_rings; ++i)
3656 ifsq_devstart_sched(sc->ss[i].tx.ifsq);
3658 sc->watchdog_resets++;
3659 } else {
3660 if_printf(sc->ifp, "NIC did not reboot, not resetting\n");
3661 err = 0;
3663 if (err) {
3664 if_printf(sc->ifp, "watchdog reset failed\n");
3665 } else {
3666 if (sc->dying == 2)
3667 sc->dying = 0;
3668 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3672 static void
3673 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3675 if_printf(sc->ifp, "slice %d struck? ring state:\n", slice);
3676 if_printf(sc->ifp, "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3677 tx->req, tx->done, tx->queue_active);
3678 if_printf(sc->ifp, "tx.activate=%d tx.deactivate=%d\n",
3679 tx->activate, tx->deactivate);
3680 if_printf(sc->ifp, "pkt_done=%d fw=%d\n",
3681 tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count));
3684 static u_long
3685 mxge_update_stats(mxge_softc_t *sc)
3687 u_long ipackets, opackets, pkts;
3689 IFNET_STAT_GET(sc->ifp, ipackets, ipackets);
3690 IFNET_STAT_GET(sc->ifp, opackets, opackets);
3692 pkts = ipackets - sc->ipackets;
3693 pkts += opackets - sc->opackets;
3695 sc->ipackets = ipackets;
3696 sc->opackets = opackets;
3698 return pkts;
3701 static void
3702 mxge_tick(void *arg)
3704 mxge_softc_t *sc = arg;
3705 u_long pkts = 0;
3706 int err = 0;
3707 int ticks;
3709 lwkt_serialize_enter(&sc->main_serialize);
3711 ticks = mxge_ticks;
3712 if (sc->ifp->if_flags & IFF_RUNNING) {
3713 /* Aggregate stats from different slices */
3714 pkts = mxge_update_stats(sc);
3715 if (sc->need_media_probe)
3716 mxge_media_probe(sc);
3718 if (pkts == 0) {
3719 uint16_t cmd;
3721 /* Ensure NIC did not suffer h/w fault while idle */
3722 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3723 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3724 sc->dying = 2;
3725 mxge_serialize_skipmain(sc);
3726 mxge_watchdog_reset(sc);
3727 mxge_deserialize_skipmain(sc);
3728 err = ENXIO;
3731 /* Look less often if NIC is idle */
3732 ticks *= 4;
3735 if (err == 0)
3736 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
3738 lwkt_serialize_exit(&sc->main_serialize);
3741 static int
3742 mxge_media_change(struct ifnet *ifp)
3744 mxge_softc_t *sc = ifp->if_softc;
3745 const struct ifmedia *ifm = &sc->media;
3746 int pause;
3748 if (IFM_OPTIONS(ifm->ifm_media) & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
3749 if (sc->pause)
3750 return 0;
3751 pause = 1;
3752 } else {
3753 if (!sc->pause)
3754 return 0;
3755 pause = 0;
3757 return mxge_change_pause(sc, pause);
3760 static int
3761 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3763 struct ifnet *ifp = sc->ifp;
3764 int real_mtu, old_mtu;
3765 int err = 0;
3767 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3768 if (mtu > sc->max_mtu || real_mtu < 60)
3769 return EINVAL;
3771 old_mtu = ifp->if_mtu;
3772 ifp->if_mtu = mtu;
3773 if (ifp->if_flags & IFF_RUNNING) {
3774 mxge_close(sc, 0);
3775 err = mxge_open(sc);
3776 if (err != 0) {
3777 ifp->if_mtu = old_mtu;
3778 mxge_close(sc, 0);
3779 mxge_open(sc);
3782 return err;
3785 static void
3786 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3788 mxge_softc_t *sc = ifp->if_softc;
3790 ifmr->ifm_status = IFM_AVALID;
3791 ifmr->ifm_active = IFM_ETHER;
3793 if (sc->link_state)
3794 ifmr->ifm_status |= IFM_ACTIVE;
3797 * Autoselect is not supported, so the current media
3798 * should be delivered.
3800 ifmr->ifm_active |= sc->current_media;
3801 if (sc->current_media != IFM_NONE) {
3802 ifmr->ifm_active |= MXGE_IFM;
3803 if (sc->pause)
3804 ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
3808 static int
3809 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data,
3810 struct ucred *cr __unused)
3812 mxge_softc_t *sc = ifp->if_softc;
3813 struct ifreq *ifr = (struct ifreq *)data;
3814 int err, mask;
3816 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3817 err = 0;
3819 switch (command) {
3820 case SIOCSIFMTU:
3821 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3822 break;
3824 case SIOCSIFFLAGS:
3825 if (sc->dying)
3826 return EINVAL;
3828 if (ifp->if_flags & IFF_UP) {
3829 if (!(ifp->if_flags & IFF_RUNNING)) {
3830 err = mxge_open(sc);
3831 } else {
3833 * Take care of PROMISC and ALLMULTI
3834 * flag changes
3836 mxge_change_promisc(sc,
3837 ifp->if_flags & IFF_PROMISC);
3838 mxge_set_multicast_list(sc);
3840 } else {
3841 if (ifp->if_flags & IFF_RUNNING)
3842 mxge_close(sc, 0);
3844 break;
3846 case SIOCADDMULTI:
3847 case SIOCDELMULTI:
3848 mxge_set_multicast_list(sc);
3849 break;
3851 case SIOCSIFCAP:
3852 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3853 if (mask & IFCAP_TXCSUM) {
3854 ifp->if_capenable ^= IFCAP_TXCSUM;
3855 if (ifp->if_capenable & IFCAP_TXCSUM)
3856 ifp->if_hwassist |= CSUM_TCP | CSUM_UDP;
3857 else
3858 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
3860 if (mask & IFCAP_TSO) {
3861 ifp->if_capenable ^= IFCAP_TSO;
3862 if (ifp->if_capenable & IFCAP_TSO)
3863 ifp->if_hwassist |= CSUM_TSO;
3864 else
3865 ifp->if_hwassist &= ~CSUM_TSO;
3867 if (mask & IFCAP_RXCSUM)
3868 ifp->if_capenable ^= IFCAP_RXCSUM;
3869 if (mask & IFCAP_VLAN_HWTAGGING)
3870 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3871 break;
3873 case SIOCGIFMEDIA:
3874 case SIOCSIFMEDIA:
3875 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
3876 &sc->media, command);
3877 break;
3879 default:
3880 err = ether_ioctl(ifp, command, data);
3881 break;
3883 return err;
3886 static void
3887 mxge_fetch_tunables(mxge_softc_t *sc)
3889 int ifm;
3891 sc->intr_coal_delay = mxge_intr_coal_delay;
3892 if (sc->intr_coal_delay < 0 || sc->intr_coal_delay > (10 * 1000))
3893 sc->intr_coal_delay = MXGE_INTR_COAL_DELAY;
3895 /* XXX */
3896 if (mxge_ticks == 0)
3897 mxge_ticks = hz / 2;
3899 ifm = ifmedia_str2ethfc(mxge_flowctrl);
3900 if (ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE))
3901 sc->pause = 1;
3903 sc->use_rss = mxge_use_rss;
3905 sc->throttle = mxge_throttle;
3906 if (sc->throttle && sc->throttle > MXGE_MAX_THROTTLE)
3907 sc->throttle = MXGE_MAX_THROTTLE;
3908 if (sc->throttle && sc->throttle < MXGE_MIN_THROTTLE)
3909 sc->throttle = MXGE_MIN_THROTTLE;
3912 static void
3913 mxge_free_slices(mxge_softc_t *sc)
3915 struct mxge_slice_state *ss;
3916 int i;
3918 if (sc->ss == NULL)
3919 return;
3921 for (i = 0; i < sc->num_slices; i++) {
3922 ss = &sc->ss[i];
3923 if (ss->fw_stats != NULL) {
3924 mxge_dma_free(&ss->fw_stats_dma);
3925 ss->fw_stats = NULL;
3927 if (ss->rx_data.rx_done.entry != NULL) {
3928 mxge_dma_free(&ss->rx_done_dma);
3929 ss->rx_data.rx_done.entry = NULL;
3932 kfree(sc->ss, M_DEVBUF);
3933 sc->ss = NULL;
3936 static int
3937 mxge_alloc_slices(mxge_softc_t *sc)
3939 mxge_cmd_t cmd;
3940 struct mxge_slice_state *ss;
3941 size_t bytes;
3942 int err, i, rx_ring_size;
3944 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
3945 if (err != 0) {
3946 device_printf(sc->dev, "Cannot determine rx ring size\n");
3947 return err;
3949 rx_ring_size = cmd.data0;
3950 sc->rx_intr_slots = 2 * (rx_ring_size / sizeof (mcp_dma_addr_t));
3952 bytes = sizeof(*sc->ss) * sc->num_slices;
3953 sc->ss = kmalloc_cachealign(bytes, M_DEVBUF, M_WAITOK | M_ZERO);
3955 for (i = 0; i < sc->num_slices; i++) {
3956 ss = &sc->ss[i];
3958 ss->sc = sc;
3960 lwkt_serialize_init(&ss->rx_data.rx_serialize);
3961 lwkt_serialize_init(&ss->tx.tx_serialize);
3962 ss->intr_rid = -1;
3965 * Allocate per-slice rx interrupt queue
3966 * XXX assume 4bytes mcp_slot
3968 bytes = sc->rx_intr_slots * sizeof(mcp_slot_t);
3969 err = mxge_dma_alloc(sc, &ss->rx_done_dma, bytes, 4096);
3970 if (err != 0) {
3971 device_printf(sc->dev,
3972 "alloc %d slice rx_done failed\n", i);
3973 return err;
3975 ss->rx_data.rx_done.entry = ss->rx_done_dma.dmem_addr;
3978 * Allocate the per-slice firmware stats
3980 bytes = sizeof(*ss->fw_stats);
3981 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
3982 sizeof(*ss->fw_stats), 64);
3983 if (err != 0) {
3984 device_printf(sc->dev,
3985 "alloc %d fw_stats failed\n", i);
3986 return err;
3988 ss->fw_stats = ss->fw_stats_dma.dmem_addr;
3990 return 0;
3993 static void
3994 mxge_slice_probe(mxge_softc_t *sc)
3996 int status, max_intr_slots, max_slices, num_slices;
3997 int msix_cnt, msix_enable, i, multi_tx;
3998 mxge_cmd_t cmd;
3999 const char *old_fw;
4001 sc->num_slices = 1;
4002 sc->num_tx_rings = 1;
4004 num_slices = device_getenv_int(sc->dev, "num_slices", mxge_num_slices);
4005 if (num_slices == 1)
4006 return;
4008 if (ncpus2 == 1)
4009 return;
4011 msix_enable = device_getenv_int(sc->dev, "msix.enable",
4012 mxge_msix_enable);
4013 if (!msix_enable)
4014 return;
4016 msix_cnt = pci_msix_count(sc->dev);
4017 if (msix_cnt < 2)
4018 return;
4021 * Round down MSI-X vector count to the nearest power of 2
4023 i = 0;
4024 while ((1 << (i + 1)) <= msix_cnt)
4025 ++i;
4026 msix_cnt = 1 << i;
4029 * Now load the slice aware firmware see what it supports
4031 old_fw = sc->fw_name;
4032 if (old_fw == mxge_fw_aligned)
4033 sc->fw_name = mxge_fw_rss_aligned;
4034 else
4035 sc->fw_name = mxge_fw_rss_unaligned;
4036 status = mxge_load_firmware(sc, 0);
4037 if (status != 0) {
4038 device_printf(sc->dev, "Falling back to a single slice\n");
4039 return;
4043 * Try to send a reset command to the card to see if it is alive
4045 memset(&cmd, 0, sizeof(cmd));
4046 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4047 if (status != 0) {
4048 device_printf(sc->dev, "failed reset\n");
4049 goto abort_with_fw;
4053 * Get rx ring size to calculate rx interrupt queue size
4055 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4056 if (status != 0) {
4057 device_printf(sc->dev, "Cannot determine rx ring size\n");
4058 goto abort_with_fw;
4060 max_intr_slots = 2 * (cmd.data0 / sizeof(mcp_dma_addr_t));
4063 * Tell it the size of the rx interrupt queue
4065 cmd.data0 = max_intr_slots * sizeof(struct mcp_slot);
4066 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4067 if (status != 0) {
4068 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4069 goto abort_with_fw;
4073 * Ask the maximum number of slices it supports
4075 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4076 if (status != 0) {
4077 device_printf(sc->dev,
4078 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4079 goto abort_with_fw;
4081 max_slices = cmd.data0;
4084 * Round down max slices count to the nearest power of 2
4086 i = 0;
4087 while ((1 << (i + 1)) <= max_slices)
4088 ++i;
4089 max_slices = 1 << i;
4091 if (max_slices > msix_cnt)
4092 max_slices = msix_cnt;
4094 sc->num_slices = num_slices;
4095 sc->num_slices = if_ring_count2(sc->num_slices, max_slices);
4097 multi_tx = device_getenv_int(sc->dev, "multi_tx", mxge_multi_tx);
4098 if (multi_tx)
4099 sc->num_tx_rings = sc->num_slices;
4101 if (bootverbose) {
4102 device_printf(sc->dev, "using %d slices, max %d\n",
4103 sc->num_slices, max_slices);
4106 if (sc->num_slices == 1)
4107 goto abort_with_fw;
4108 return;
4110 abort_with_fw:
4111 sc->fw_name = old_fw;
4112 mxge_load_firmware(sc, 0);
4115 static void
4116 mxge_setup_serialize(struct mxge_softc *sc)
4118 int i = 0, slice;
4120 /* Main + rx + tx */
4121 sc->nserialize = (2 * sc->num_slices) + 1;
4122 sc->serializes =
4123 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
4124 M_DEVBUF, M_WAITOK | M_ZERO);
4127 * Setup serializes
4129 * NOTE: Order is critical
4132 KKASSERT(i < sc->nserialize);
4133 sc->serializes[i++] = &sc->main_serialize;
4135 for (slice = 0; slice < sc->num_slices; ++slice) {
4136 KKASSERT(i < sc->nserialize);
4137 sc->serializes[i++] = &sc->ss[slice].rx_data.rx_serialize;
4140 for (slice = 0; slice < sc->num_slices; ++slice) {
4141 KKASSERT(i < sc->nserialize);
4142 sc->serializes[i++] = &sc->ss[slice].tx.tx_serialize;
4145 KKASSERT(i == sc->nserialize);
4148 static void
4149 mxge_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
4151 struct mxge_softc *sc = ifp->if_softc;
4153 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
4156 static void
4157 mxge_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4159 struct mxge_softc *sc = ifp->if_softc;
4161 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
4164 static int
4165 mxge_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4167 struct mxge_softc *sc = ifp->if_softc;
4169 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
4172 #ifdef INVARIANTS
4174 static void
4175 mxge_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
4176 boolean_t serialized)
4178 struct mxge_softc *sc = ifp->if_softc;
4180 ifnet_serialize_array_assert(sc->serializes, sc->nserialize,
4181 slz, serialized);
4184 #endif /* INVARIANTS */
4186 #ifdef IFPOLL_ENABLE
4188 static void
4189 mxge_npoll_rx(struct ifnet *ifp, void *xss, int cycle)
4191 struct mxge_slice_state *ss = xss;
4192 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
4194 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
4196 if (rx_done->entry[rx_done->idx].length != 0) {
4197 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, cycle);
4198 } else {
4200 * XXX
4201 * This register writting obviously has cost,
4202 * however, if we don't hand back the rx token,
4203 * the upcoming packets may suffer rediculously
4204 * large delay, as observed on 8AL-C using ping(8).
4206 *ss->irq_claim = be32toh(3);
4210 static void
4211 mxge_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4213 struct mxge_softc *sc = ifp->if_softc;
4214 int i;
4216 if (info == NULL)
4217 return;
4220 * Only poll rx; polling tx and status don't seem to work
4222 for (i = 0; i < sc->num_slices; ++i) {
4223 struct mxge_slice_state *ss = &sc->ss[i];
4224 int idx = ss->intr_cpuid;
4226 KKASSERT(idx < ncpus2);
4227 info->ifpi_rx[idx].poll_func = mxge_npoll_rx;
4228 info->ifpi_rx[idx].arg = ss;
4229 info->ifpi_rx[idx].serializer = &ss->rx_data.rx_serialize;
4233 #endif /* IFPOLL_ENABLE */
4235 static int
4236 mxge_attach(device_t dev)
4238 mxge_softc_t *sc = device_get_softc(dev);
4239 struct ifnet *ifp = &sc->arpcom.ac_if;
4240 int err, rid, i;
4243 * Avoid rewriting half the lines in this file to use
4244 * &sc->arpcom.ac_if instead
4246 sc->ifp = ifp;
4247 sc->dev = dev;
4248 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4250 /* IFM_ETH_FORCEPAUSE can't be changed */
4251 ifmedia_init(&sc->media, IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE,
4252 mxge_media_change, mxge_media_status);
4254 lwkt_serialize_init(&sc->main_serialize);
4256 mxge_fetch_tunables(sc);
4258 err = bus_dma_tag_create(NULL, /* parent */
4259 1, /* alignment */
4260 0, /* boundary */
4261 BUS_SPACE_MAXADDR, /* low */
4262 BUS_SPACE_MAXADDR, /* high */
4263 NULL, NULL, /* filter */
4264 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
4265 0, /* num segs */
4266 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
4267 0, /* flags */
4268 &sc->parent_dmat); /* tag */
4269 if (err != 0) {
4270 device_printf(dev, "Err %d allocating parent dmat\n", err);
4271 goto failed;
4274 callout_init_mp(&sc->co_hdl);
4276 mxge_setup_cfg_space(sc);
4279 * Map the board into the kernel
4281 rid = PCIR_BARS;
4282 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
4283 &rid, RF_ACTIVE);
4284 if (sc->mem_res == NULL) {
4285 device_printf(dev, "could not map memory\n");
4286 err = ENXIO;
4287 goto failed;
4290 sc->sram = rman_get_virtual(sc->mem_res);
4291 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4292 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4293 device_printf(dev, "impossible memory region size %ld\n",
4294 rman_get_size(sc->mem_res));
4295 err = ENXIO;
4296 goto failed;
4300 * Make NULL terminated copy of the EEPROM strings section of
4301 * lanai SRAM
4303 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4304 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4305 rman_get_bushandle(sc->mem_res),
4306 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4307 sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2);
4308 err = mxge_parse_strings(sc);
4309 if (err != 0) {
4310 device_printf(dev, "parse EEPROM string failed\n");
4311 goto failed;
4315 * Enable write combining for efficient use of PCIe bus
4317 mxge_enable_wc(sc);
4320 * Allocate the out of band DMA memory
4322 err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof(mxge_cmd_t), 64);
4323 if (err != 0) {
4324 device_printf(dev, "alloc cmd DMA buf failed\n");
4325 goto failed;
4327 sc->cmd = sc->cmd_dma.dmem_addr;
4329 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4330 if (err != 0) {
4331 device_printf(dev, "alloc zeropad DMA buf failed\n");
4332 goto failed;
4335 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4336 if (err != 0) {
4337 device_printf(dev, "alloc dmabench DMA buf failed\n");
4338 goto failed;
4341 /* Select & load the firmware */
4342 err = mxge_select_firmware(sc);
4343 if (err != 0) {
4344 device_printf(dev, "select firmware failed\n");
4345 goto failed;
4348 mxge_slice_probe(sc);
4349 err = mxge_alloc_slices(sc);
4350 if (err != 0) {
4351 device_printf(dev, "alloc slices failed\n");
4352 goto failed;
4355 err = mxge_alloc_intr(sc);
4356 if (err != 0) {
4357 device_printf(dev, "alloc intr failed\n");
4358 goto failed;
4361 /* Setup serializes */
4362 mxge_setup_serialize(sc);
4364 err = mxge_reset(sc, 0);
4365 if (err != 0) {
4366 device_printf(dev, "reset failed\n");
4367 goto failed;
4370 err = mxge_alloc_rings(sc);
4371 if (err != 0) {
4372 device_printf(dev, "failed to allocate rings\n");
4373 goto failed;
4376 ifp->if_baudrate = IF_Gbps(10UL);
4377 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO;
4378 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4380 ifp->if_capabilities |= IFCAP_VLAN_MTU;
4381 #if 0
4382 /* Well, its software, sigh */
4383 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
4384 #endif
4385 ifp->if_capenable = ifp->if_capabilities;
4387 ifp->if_softc = sc;
4388 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4389 ifp->if_init = mxge_init;
4390 ifp->if_ioctl = mxge_ioctl;
4391 ifp->if_start = mxge_start;
4392 #ifdef IFPOLL_ENABLE
4393 if (sc->intr_type != PCI_INTR_TYPE_LEGACY)
4394 ifp->if_npoll = mxge_npoll;
4395 #endif
4396 ifp->if_serialize = mxge_serialize;
4397 ifp->if_deserialize = mxge_deserialize;
4398 ifp->if_tryserialize = mxge_tryserialize;
4399 #ifdef INVARIANTS
4400 ifp->if_serialize_assert = mxge_serialize_assert;
4401 #endif
4403 /* Increase TSO burst length */
4404 ifp->if_tsolen = (32 * ETHERMTU);
4406 /* Initialise the ifmedia structure */
4407 mxge_media_init(sc);
4408 mxge_media_probe(sc);
4410 ether_ifattach(ifp, sc->mac_addr, NULL);
4412 /* Setup TX rings and subqueues */
4413 for (i = 0; i < sc->num_tx_rings; ++i) {
4414 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
4415 struct mxge_slice_state *ss = &sc->ss[i];
4417 ifsq_set_cpuid(ifsq, ss->intr_cpuid);
4418 ifsq_set_hw_serialize(ifsq, &ss->tx.tx_serialize);
4419 ifsq_set_priv(ifsq, &ss->tx);
4420 ss->tx.ifsq = ifsq;
4422 ifsq_watchdog_init(&ss->tx.watchdog, ifsq, mxge_watchdog);
4426 * XXX
4427 * We are not ready to do "gather" jumbo frame, so
4428 * limit MTU to MJUMPAGESIZE
4430 sc->max_mtu = MJUMPAGESIZE -
4431 ETHER_HDR_LEN - EVL_ENCAPLEN - MXGEFW_PAD - 1;
4432 sc->dying = 0;
4434 err = mxge_setup_intr(sc);
4435 if (err != 0) {
4436 device_printf(dev, "alloc and setup intr failed\n");
4437 ether_ifdetach(ifp);
4438 goto failed;
4441 mxge_add_sysctls(sc);
4443 /* Increase non-cluster mbuf limit; used by small RX rings */
4444 mb_inclimit(ifp->if_nmbclusters);
4446 callout_reset_bycpu(&sc->co_hdl, mxge_ticks, mxge_tick, sc,
4447 sc->ss[0].intr_cpuid);
4448 return 0;
4450 failed:
4451 mxge_detach(dev);
4452 return err;
4455 static int
4456 mxge_detach(device_t dev)
4458 mxge_softc_t *sc = device_get_softc(dev);
4460 if (device_is_attached(dev)) {
4461 struct ifnet *ifp = sc->ifp;
4462 int mblimit = ifp->if_nmbclusters;
4464 ifnet_serialize_all(ifp);
4466 sc->dying = 1;
4467 if (ifp->if_flags & IFF_RUNNING)
4468 mxge_close(sc, 1);
4469 callout_stop(&sc->co_hdl);
4471 mxge_teardown_intr(sc, sc->num_slices);
4473 ifnet_deserialize_all(ifp);
4475 callout_terminate(&sc->co_hdl);
4477 ether_ifdetach(ifp);
4479 /* Decrease non-cluster mbuf limit increased by us */
4480 mb_inclimit(-mblimit);
4482 ifmedia_removeall(&sc->media);
4484 if (sc->cmd != NULL && sc->zeropad_dma.dmem_addr != NULL &&
4485 sc->sram != NULL)
4486 mxge_dummy_rdma(sc, 0);
4488 mxge_free_intr(sc);
4489 mxge_rem_sysctls(sc);
4490 mxge_free_rings(sc);
4492 /* MUST after sysctls, intr and rings are freed */
4493 mxge_free_slices(sc);
4495 if (sc->dmabench_dma.dmem_addr != NULL)
4496 mxge_dma_free(&sc->dmabench_dma);
4497 if (sc->zeropad_dma.dmem_addr != NULL)
4498 mxge_dma_free(&sc->zeropad_dma);
4499 if (sc->cmd_dma.dmem_addr != NULL)
4500 mxge_dma_free(&sc->cmd_dma);
4502 if (sc->msix_table_res != NULL) {
4503 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(2),
4504 sc->msix_table_res);
4506 if (sc->mem_res != NULL) {
4507 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS,
4508 sc->mem_res);
4511 if (sc->parent_dmat != NULL)
4512 bus_dma_tag_destroy(sc->parent_dmat);
4514 return 0;
4517 static int
4518 mxge_shutdown(device_t dev)
4520 return 0;
4523 static void
4524 mxge_free_msix(struct mxge_softc *sc, boolean_t setup)
4526 int i;
4528 KKASSERT(sc->num_slices > 1);
4530 for (i = 0; i < sc->num_slices; ++i) {
4531 struct mxge_slice_state *ss = &sc->ss[i];
4533 if (ss->intr_res != NULL) {
4534 bus_release_resource(sc->dev, SYS_RES_IRQ,
4535 ss->intr_rid, ss->intr_res);
4537 if (ss->intr_rid >= 0)
4538 pci_release_msix_vector(sc->dev, ss->intr_rid);
4540 if (setup)
4541 pci_teardown_msix(sc->dev);
4544 static int
4545 mxge_alloc_msix(struct mxge_softc *sc)
4547 struct mxge_slice_state *ss;
4548 int offset, rid, error, i;
4549 boolean_t setup = FALSE;
4551 KKASSERT(sc->num_slices > 1);
4553 if (sc->num_slices == ncpus2) {
4554 offset = 0;
4555 } else {
4556 int offset_def;
4558 offset_def = (sc->num_slices * device_get_unit(sc->dev)) %
4559 ncpus2;
4561 offset = device_getenv_int(sc->dev, "msix.offset", offset_def);
4562 if (offset >= ncpus2 ||
4563 offset % sc->num_slices != 0) {
4564 device_printf(sc->dev, "invalid msix.offset %d, "
4565 "use %d\n", offset, offset_def);
4566 offset = offset_def;
4570 ss = &sc->ss[0];
4572 ss->intr_serialize = &sc->main_serialize;
4573 ss->intr_func = mxge_msi;
4574 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4575 "%s comb", device_get_nameunit(sc->dev));
4576 ss->intr_desc = ss->intr_desc0;
4577 ss->intr_cpuid = offset;
4579 for (i = 1; i < sc->num_slices; ++i) {
4580 ss = &sc->ss[i];
4582 ss->intr_serialize = &ss->rx_data.rx_serialize;
4583 if (sc->num_tx_rings == 1) {
4584 ss->intr_func = mxge_msix_rx;
4585 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4586 "%s rx", device_get_nameunit(sc->dev));
4587 } else {
4588 ss->intr_func = mxge_msix_rxtx;
4589 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4590 "%s rxtx", device_get_nameunit(sc->dev));
4592 ss->intr_desc = ss->intr_desc0;
4593 ss->intr_cpuid = offset + i;
4596 rid = PCIR_BAR(2);
4597 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4598 &rid, RF_ACTIVE);
4599 if (sc->msix_table_res == NULL) {
4600 device_printf(sc->dev, "couldn't alloc MSI-X table res\n");
4601 return ENXIO;
4604 error = pci_setup_msix(sc->dev);
4605 if (error) {
4606 device_printf(sc->dev, "could not setup MSI-X\n");
4607 goto back;
4609 setup = TRUE;
4611 for (i = 0; i < sc->num_slices; ++i) {
4612 ss = &sc->ss[i];
4614 error = pci_alloc_msix_vector(sc->dev, i, &ss->intr_rid,
4615 ss->intr_cpuid);
4616 if (error) {
4617 device_printf(sc->dev, "could not alloc "
4618 "MSI-X %d on cpu%d\n", i, ss->intr_cpuid);
4619 goto back;
4622 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4623 &ss->intr_rid, RF_ACTIVE);
4624 if (ss->intr_res == NULL) {
4625 device_printf(sc->dev, "could not alloc "
4626 "MSI-X %d resource\n", i);
4627 error = ENXIO;
4628 goto back;
4632 pci_enable_msix(sc->dev);
4633 sc->intr_type = PCI_INTR_TYPE_MSIX;
4634 back:
4635 if (error)
4636 mxge_free_msix(sc, setup);
4637 return error;
4640 static int
4641 mxge_alloc_intr(struct mxge_softc *sc)
4643 struct mxge_slice_state *ss;
4644 u_int irq_flags;
4646 if (sc->num_slices > 1) {
4647 int error;
4649 error = mxge_alloc_msix(sc);
4650 if (error)
4651 return error;
4652 KKASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX);
4653 return 0;
4656 ss = &sc->ss[0];
4658 sc->intr_type = pci_alloc_1intr(sc->dev, mxge_msi_enable,
4659 &ss->intr_rid, &irq_flags);
4661 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4662 &ss->intr_rid, irq_flags);
4663 if (ss->intr_res == NULL) {
4664 device_printf(sc->dev, "could not alloc interrupt\n");
4665 return ENXIO;
4668 if (sc->intr_type == PCI_INTR_TYPE_LEGACY)
4669 ss->intr_func = mxge_legacy;
4670 else
4671 ss->intr_func = mxge_msi;
4672 ss->intr_serialize = &sc->main_serialize;
4673 ss->intr_cpuid = rman_get_cpuid(ss->intr_res);
4675 return 0;
4678 static int
4679 mxge_setup_intr(struct mxge_softc *sc)
4681 int i;
4683 for (i = 0; i < sc->num_slices; ++i) {
4684 struct mxge_slice_state *ss = &sc->ss[i];
4685 int error;
4687 error = bus_setup_intr_descr(sc->dev, ss->intr_res,
4688 INTR_MPSAFE, ss->intr_func, ss, &ss->intr_hand,
4689 ss->intr_serialize, ss->intr_desc);
4690 if (error) {
4691 device_printf(sc->dev, "can't setup %dth intr\n", i);
4692 mxge_teardown_intr(sc, i);
4693 return error;
4696 return 0;
4699 static void
4700 mxge_teardown_intr(struct mxge_softc *sc, int cnt)
4702 int i;
4704 if (sc->ss == NULL)
4705 return;
4707 for (i = 0; i < cnt; ++i) {
4708 struct mxge_slice_state *ss = &sc->ss[i];
4710 bus_teardown_intr(sc->dev, ss->intr_res, ss->intr_hand);
4714 static void
4715 mxge_free_intr(struct mxge_softc *sc)
4717 if (sc->ss == NULL)
4718 return;
4720 if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
4721 struct mxge_slice_state *ss = &sc->ss[0];
4723 if (ss->intr_res != NULL) {
4724 bus_release_resource(sc->dev, SYS_RES_IRQ,
4725 ss->intr_rid, ss->intr_res);
4727 if (sc->intr_type == PCI_INTR_TYPE_MSI)
4728 pci_release_msi(sc->dev);
4729 } else {
4730 mxge_free_msix(sc, TRUE);