IMPORT openssh-9.8p1
[dragonfly.git] / sys / dev / netif / mxge / if_mxge.c
blobe037891e2e9c45766074b362ef610258d4d86c48
1 /******************************************************************************
3 Copyright (c) 2006-2013, Myricom Inc.
4 All rights reserved.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 $FreeBSD: head/sys/dev/mxge/if_mxge.c 254263 2013-08-12 23:30:01Z scottl $
30 ***************************************************************************/
32 #include "opt_ifpoll.h"
33 #include "opt_inet.h"
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/linker.h>
38 #include <sys/firmware.h>
39 #include <sys/endian.h>
40 #include <sys/in_cksum.h>
41 #include <sys/sockio.h>
42 #include <sys/mbuf.h>
43 #include <sys/malloc.h>
44 #include <sys/kernel.h>
45 #include <sys/module.h>
46 #include <sys/serialize.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
50 #include <net/if.h>
51 #include <net/if_arp.h>
52 #include <net/ifq_var.h>
53 #include <net/if_ringmap.h>
54 #include <net/ethernet.h>
55 #include <net/if_dl.h>
56 #include <net/if_media.h>
57 #include <net/if_poll.h>
59 #include <net/bpf.h>
61 #include <net/if_types.h>
62 #include <net/vlan/if_vlan_var.h>
63 #include <net/zlib.h>
64 #include <net/toeplitz.h>
66 #include <netinet/in_systm.h>
67 #include <netinet/in.h>
68 #include <netinet/ip.h>
69 #include <netinet/tcp.h>
71 #include <sys/bus.h>
72 #include <sys/rman.h>
74 #include <bus/pci/pcireg.h>
75 #include <bus/pci/pcivar.h>
76 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
78 #include <vm/vm.h> /* for pmap_mapdev() */
79 #include <vm/pmap.h>
81 #if defined(__x86_64__)
82 #include <machine/specialreg.h>
83 #endif
85 #include <dev/netif/mxge/mxge_mcp.h>
86 #include <dev/netif/mxge/mcp_gen_header.h>
87 #include <dev/netif/mxge/if_mxge_var.h>
89 #define MXGE_IFM (IFM_ETHER | IFM_FDX | IFM_ETH_FORCEPAUSE)
91 #define MXGE_RX_SMALL_BUFLEN (MHLEN - MXGEFW_PAD)
92 #define MXGE_HWRSS_KEYLEN 16
94 /* Tunable params */
95 static int mxge_nvidia_ecrc_enable = 1;
96 static int mxge_force_firmware = 0;
97 static int mxge_intr_coal_delay = MXGE_INTR_COAL_DELAY;
98 static int mxge_deassert_wait = 1;
99 static int mxge_ticks;
100 static int mxge_num_slices = 0;
101 static int mxge_always_promisc = 0;
102 static int mxge_throttle = 0;
103 static int mxge_msi_enable = 1;
104 static int mxge_msix_enable = 1;
105 static int mxge_multi_tx = 1;
107 * Don't use RSS by default, its just too slow
109 static int mxge_use_rss = 0;
111 static char mxge_flowctrl[IFM_ETH_FC_STRLEN] = IFM_ETH_FC_FORCE_NONE;
113 static const char *mxge_fw_unaligned = "mxge_ethp_z8e";
114 static const char *mxge_fw_aligned = "mxge_eth_z8e";
115 static const char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
116 static const char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
118 TUNABLE_INT("hw.mxge.num_slices", &mxge_num_slices);
119 TUNABLE_INT("hw.mxge.intr_coal_delay", &mxge_intr_coal_delay);
120 TUNABLE_INT("hw.mxge.nvidia_ecrc_enable", &mxge_nvidia_ecrc_enable);
121 TUNABLE_INT("hw.mxge.force_firmware", &mxge_force_firmware);
122 TUNABLE_INT("hw.mxge.deassert_wait", &mxge_deassert_wait);
123 TUNABLE_INT("hw.mxge.ticks", &mxge_ticks);
124 TUNABLE_INT("hw.mxge.always_promisc", &mxge_always_promisc);
125 TUNABLE_INT("hw.mxge.throttle", &mxge_throttle);
126 TUNABLE_INT("hw.mxge.multi_tx", &mxge_multi_tx);
127 TUNABLE_INT("hw.mxge.use_rss", &mxge_use_rss);
128 TUNABLE_INT("hw.mxge.msi.enable", &mxge_msi_enable);
129 TUNABLE_INT("hw.mxge.msix.enable", &mxge_msix_enable);
130 TUNABLE_STR("hw.mxge.flow_ctrl", mxge_flowctrl, sizeof(mxge_flowctrl));
132 static int mxge_probe(device_t dev);
133 static int mxge_attach(device_t dev);
134 static int mxge_detach(device_t dev);
135 static int mxge_shutdown(device_t dev);
137 static int mxge_alloc_intr(struct mxge_softc *sc);
138 static void mxge_free_intr(struct mxge_softc *sc);
139 static int mxge_setup_intr(struct mxge_softc *sc);
140 static void mxge_teardown_intr(struct mxge_softc *sc, int cnt);
142 static device_method_t mxge_methods[] = {
143 /* Device interface */
144 DEVMETHOD(device_probe, mxge_probe),
145 DEVMETHOD(device_attach, mxge_attach),
146 DEVMETHOD(device_detach, mxge_detach),
147 DEVMETHOD(device_shutdown, mxge_shutdown),
148 DEVMETHOD_END
151 static driver_t mxge_driver = {
152 "mxge",
153 mxge_methods,
154 sizeof(mxge_softc_t),
157 static devclass_t mxge_devclass;
159 /* Declare ourselves to be a child of the PCI bus.*/
160 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, NULL, NULL);
161 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
162 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
164 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
165 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
166 static void mxge_close(mxge_softc_t *sc, int down);
167 static int mxge_open(mxge_softc_t *sc);
168 static void mxge_tick(void *arg);
169 static void mxge_watchdog_reset(mxge_softc_t *sc);
170 static void mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice);
172 static int
173 mxge_probe(device_t dev)
175 if (pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM &&
176 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E ||
177 pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9)) {
178 int rev = pci_get_revid(dev);
180 switch (rev) {
181 case MXGE_PCI_REV_Z8E:
182 device_set_desc(dev, "Myri10G-PCIE-8A");
183 break;
184 case MXGE_PCI_REV_Z8ES:
185 device_set_desc(dev, "Myri10G-PCIE-8B");
186 break;
187 default:
188 device_set_desc(dev, "Myri10G-PCIE-8??");
189 device_printf(dev, "Unrecognized rev %d NIC\n", rev);
190 break;
192 return 0;
194 return ENXIO;
197 static void
198 mxge_enable_wc(mxge_softc_t *sc)
200 #if defined(__x86_64__)
201 vm_offset_t len;
203 sc->wc = 1;
204 len = rman_get_size(sc->mem_res);
205 pmap_change_attr((vm_offset_t) sc->sram, len / PAGE_SIZE,
206 PAT_WRITE_COMBINING);
207 #endif
210 static int
211 mxge_dma_alloc(mxge_softc_t *sc, bus_dmamem_t *dma, size_t bytes,
212 bus_size_t alignment)
214 bus_size_t boundary;
215 int err;
217 if (bytes > 4096 && alignment == 4096)
218 boundary = 0;
219 else
220 boundary = 4096;
222 err = bus_dmamem_coherent(sc->parent_dmat, alignment, boundary,
223 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, bytes,
224 BUS_DMA_WAITOK | BUS_DMA_ZERO, dma);
225 if (err != 0) {
226 device_printf(sc->dev, "bus_dmamem_coherent failed: %d\n", err);
227 return err;
229 return 0;
232 static void
233 mxge_dma_free(bus_dmamem_t *dma)
235 bus_dmamap_unload(dma->dmem_tag, dma->dmem_map);
236 bus_dmamem_free(dma->dmem_tag, dma->dmem_addr, dma->dmem_map);
237 bus_dma_tag_destroy(dma->dmem_tag);
241 * The eeprom strings on the lanaiX have the format
242 * SN=x\0
243 * MAC=x:x:x:x:x:x\0
244 * PC=text\0
246 static int
247 mxge_parse_strings(mxge_softc_t *sc)
249 const char *ptr;
250 int i, found_mac, found_sn2;
251 char *endptr;
253 ptr = sc->eeprom_strings;
254 found_mac = 0;
255 found_sn2 = 0;
256 while (*ptr != '\0') {
257 if (strncmp(ptr, "MAC=", 4) == 0) {
258 ptr += 4;
259 for (i = 0;;) {
260 sc->mac_addr[i] = strtoul(ptr, &endptr, 16);
261 if (endptr - ptr != 2)
262 goto abort;
263 ptr = endptr;
264 if (++i == 6)
265 break;
266 if (*ptr++ != ':')
267 goto abort;
269 found_mac = 1;
270 } else if (strncmp(ptr, "PC=", 3) == 0) {
271 ptr += 3;
272 strlcpy(sc->product_code_string, ptr,
273 sizeof(sc->product_code_string));
274 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) {
275 ptr += 3;
276 strlcpy(sc->serial_number_string, ptr,
277 sizeof(sc->serial_number_string));
278 } else if (strncmp(ptr, "SN2=", 4) == 0) {
279 /* SN2 takes precedence over SN */
280 ptr += 4;
281 found_sn2 = 1;
282 strlcpy(sc->serial_number_string, ptr,
283 sizeof(sc->serial_number_string));
285 while (*ptr++ != '\0') {}
288 if (found_mac)
289 return 0;
291 abort:
292 device_printf(sc->dev, "failed to parse eeprom_strings\n");
293 return ENXIO;
296 #if defined(__x86_64__)
298 static void
299 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
301 uint32_t val;
302 unsigned long base, off;
303 char *va, *cfgptr;
304 device_t pdev, mcp55;
305 uint16_t vendor_id, device_id, word;
306 uintptr_t bus, slot, func, ivend, idev;
307 uint32_t *ptr32;
309 if (!mxge_nvidia_ecrc_enable)
310 return;
312 pdev = device_get_parent(device_get_parent(sc->dev));
313 if (pdev == NULL) {
314 device_printf(sc->dev, "could not find parent?\n");
315 return;
317 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
318 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
320 if (vendor_id != 0x10de)
321 return;
323 base = 0;
325 if (device_id == 0x005d) {
326 /* ck804, base address is magic */
327 base = 0xe0000000UL;
328 } else if (device_id >= 0x0374 && device_id <= 0x378) {
329 /* mcp55, base address stored in chipset */
330 mcp55 = pci_find_bsf(0, 0, 0);
331 if (mcp55 &&
332 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
333 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
334 word = pci_read_config(mcp55, 0x90, 2);
335 base = ((unsigned long)word & 0x7ffeU) << 25;
338 if (!base)
339 return;
342 * XXXX
343 * Test below is commented because it is believed that doing
344 * config read/write beyond 0xff will access the config space
345 * for the next larger function. Uncomment this and remove
346 * the hacky pmap_mapdev() way of accessing config space when
347 * DragonFly grows support for extended pcie config space access.
349 #if 0
351 * See if we can, by some miracle, access the extended
352 * config space
354 val = pci_read_config(pdev, 0x178, 4);
355 if (val != 0xffffffff) {
356 val |= 0x40;
357 pci_write_config(pdev, 0x178, val, 4);
358 return;
360 #endif
362 * Rather than using normal pci config space writes, we must
363 * map the Nvidia config space ourselves. This is because on
364 * opteron/nvidia class machine the 0xe000000 mapping is
365 * handled by the nvidia chipset, that means the internal PCI
366 * device (the on-chip northbridge), or the amd-8131 bridge
367 * and things behind them are not visible by this method.
370 BUS_READ_IVAR(device_get_parent(pdev), pdev,
371 PCI_IVAR_BUS, &bus);
372 BUS_READ_IVAR(device_get_parent(pdev), pdev,
373 PCI_IVAR_SLOT, &slot);
374 BUS_READ_IVAR(device_get_parent(pdev), pdev,
375 PCI_IVAR_FUNCTION, &func);
376 BUS_READ_IVAR(device_get_parent(pdev), pdev,
377 PCI_IVAR_VENDOR, &ivend);
378 BUS_READ_IVAR(device_get_parent(pdev), pdev,
379 PCI_IVAR_DEVICE, &idev);
381 off = base + 0x00100000UL * (unsigned long)bus +
382 0x00001000UL * (unsigned long)(func + 8 * slot);
384 /* map it into the kernel */
385 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
386 if (va == NULL) {
387 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
388 return;
390 /* get a pointer to the config space mapped into the kernel */
391 cfgptr = va + (off & PAGE_MASK);
393 /* make sure that we can really access it */
394 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
395 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
396 if (!(vendor_id == ivend && device_id == idev)) {
397 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
398 vendor_id, device_id);
399 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
400 return;
403 ptr32 = (uint32_t*)(cfgptr + 0x178);
404 val = *ptr32;
406 if (val == 0xffffffff) {
407 device_printf(sc->dev, "extended mapping failed\n");
408 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
409 return;
411 *ptr32 = val | 0x40;
412 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
413 if (bootverbose) {
414 device_printf(sc->dev, "Enabled ECRC on upstream "
415 "Nvidia bridge at %d:%d:%d\n",
416 (int)bus, (int)slot, (int)func);
420 #else /* __x86_64__ */
422 static void
423 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
425 device_printf(sc->dev, "Nforce 4 chipset on non-x86/x86_64!?!?!\n");
428 #endif
430 static int
431 mxge_dma_test(mxge_softc_t *sc, int test_type)
433 mxge_cmd_t cmd;
434 bus_addr_t dmatest_bus = sc->dmabench_dma.dmem_busaddr;
435 int status;
436 uint32_t len;
437 const char *test = " ";
440 * Run a small DMA test.
441 * The magic multipliers to the length tell the firmware
442 * to do DMA read, write, or read+write tests. The
443 * results are returned in cmd.data0. The upper 16
444 * bits of the return is the number of transfers completed.
445 * The lower 16 bits is the time in 0.5us ticks that the
446 * transfers took to complete.
449 len = sc->tx_boundary;
451 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
452 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
453 cmd.data2 = len * 0x10000;
454 status = mxge_send_cmd(sc, test_type, &cmd);
455 if (status != 0) {
456 test = "read";
457 goto abort;
459 sc->read_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
461 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
462 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
463 cmd.data2 = len * 0x1;
464 status = mxge_send_cmd(sc, test_type, &cmd);
465 if (status != 0) {
466 test = "write";
467 goto abort;
469 sc->write_dma = ((cmd.data0>>16) * len * 2) / (cmd.data0 & 0xffff);
471 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
472 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
473 cmd.data2 = len * 0x10001;
474 status = mxge_send_cmd(sc, test_type, &cmd);
475 if (status != 0) {
476 test = "read/write";
477 goto abort;
479 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
480 (cmd.data0 & 0xffff);
482 abort:
483 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) {
484 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
485 test, status);
487 return status;
491 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
492 * when the PCI-E Completion packets are aligned on an 8-byte
493 * boundary. Some PCI-E chip sets always align Completion packets; on
494 * the ones that do not, the alignment can be enforced by enabling
495 * ECRC generation (if supported).
497 * When PCI-E Completion packets are not aligned, it is actually more
498 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
500 * If the driver can neither enable ECRC nor verify that it has
501 * already been enabled, then it must use a firmware image which works
502 * around unaligned completion packets (ethp_z8e.dat), and it should
503 * also ensure that it never gives the device a Read-DMA which is
504 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
505 * enabled, then the driver should use the aligned (eth_z8e.dat)
506 * firmware image, and set tx_boundary to 4KB.
508 static int
509 mxge_firmware_probe(mxge_softc_t *sc)
511 device_t dev = sc->dev;
512 int reg, status;
513 uint16_t pectl;
515 sc->tx_boundary = 4096;
518 * Verify the max read request size was set to 4KB
519 * before trying the test with 4KB.
521 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
522 pectl = pci_read_config(dev, reg + 0x8, 2);
523 if ((pectl & (5 << 12)) != (5 << 12)) {
524 device_printf(dev, "Max Read Req. size != 4k (0x%x)\n",
525 pectl);
526 sc->tx_boundary = 2048;
531 * Load the optimized firmware (which assumes aligned PCIe
532 * completions) in order to see if it works on this host.
534 sc->fw_name = mxge_fw_aligned;
535 status = mxge_load_firmware(sc, 1);
536 if (status != 0)
537 return status;
540 * Enable ECRC if possible
542 mxge_enable_nvidia_ecrc(sc);
545 * Run a DMA test which watches for unaligned completions and
546 * aborts on the first one seen. Not required on Z8ES or newer.
548 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES)
549 return 0;
551 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
552 if (status == 0)
553 return 0; /* keep the aligned firmware */
555 if (status != E2BIG)
556 device_printf(dev, "DMA test failed: %d\n", status);
557 if (status == ENOSYS) {
558 device_printf(dev, "Falling back to ethp! "
559 "Please install up to date fw\n");
561 return status;
564 static int
565 mxge_select_firmware(mxge_softc_t *sc)
567 int aligned = 0;
568 int force_firmware = mxge_force_firmware;
570 if (sc->throttle)
571 force_firmware = sc->throttle;
573 if (force_firmware != 0) {
574 if (force_firmware == 1)
575 aligned = 1;
576 else
577 aligned = 0;
578 if (bootverbose) {
579 device_printf(sc->dev,
580 "Assuming %s completions (forced)\n",
581 aligned ? "aligned" : "unaligned");
583 goto abort;
587 * If the PCIe link width is 4 or less, we can use the aligned
588 * firmware and skip any checks
590 if (sc->link_width != 0 && sc->link_width <= 4) {
591 device_printf(sc->dev, "PCIe x%d Link, "
592 "expect reduced performance\n", sc->link_width);
593 aligned = 1;
594 goto abort;
597 if (mxge_firmware_probe(sc) == 0)
598 return 0;
600 abort:
601 if (aligned) {
602 sc->fw_name = mxge_fw_aligned;
603 sc->tx_boundary = 4096;
604 } else {
605 sc->fw_name = mxge_fw_unaligned;
606 sc->tx_boundary = 2048;
608 return mxge_load_firmware(sc, 0);
611 static int
612 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
614 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
615 if_printf(sc->ifp, "Bad firmware type: 0x%x\n",
616 be32toh(hdr->mcp_type));
617 return EIO;
620 /* Save firmware version for sysctl */
621 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version));
622 if (bootverbose)
623 if_printf(sc->ifp, "firmware id: %s\n", hdr->version);
625 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
626 &sc->fw_ver_minor, &sc->fw_ver_tiny);
628 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR &&
629 sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
630 if_printf(sc->ifp, "Found firmware version %s\n",
631 sc->fw_version);
632 if_printf(sc->ifp, "Driver needs %d.%d\n",
633 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
634 return EINVAL;
636 return 0;
639 static void *
640 z_alloc(void *nil, u_int items, u_int size)
642 return kmalloc(items * size, M_TEMP, M_WAITOK);
645 static void
646 z_free(void *nil, void *ptr)
648 kfree(ptr, M_TEMP);
651 static int
652 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
654 z_stream zs;
655 char *inflate_buffer;
656 const struct firmware *fw;
657 const mcp_gen_header_t *hdr;
658 unsigned hdr_offset;
659 int status;
660 unsigned int i;
661 char dummy;
662 size_t fw_len;
664 fw = firmware_get(sc->fw_name);
665 if (fw == NULL) {
666 if_printf(sc->ifp, "Could not find firmware image %s\n",
667 sc->fw_name);
668 return ENOENT;
671 /* Setup zlib and decompress f/w */
672 bzero(&zs, sizeof(zs));
673 zs.zalloc = z_alloc;
674 zs.zfree = z_free;
675 status = inflateInit(&zs);
676 if (status != Z_OK) {
677 status = EIO;
678 goto abort_with_fw;
682 * The uncompressed size is stored as the firmware version,
683 * which would otherwise go unused
685 fw_len = (size_t)fw->version;
686 inflate_buffer = kmalloc(fw_len, M_TEMP, M_WAITOK);
687 zs.avail_in = fw->datasize;
688 zs.next_in = __DECONST(char *, fw->data);
689 zs.avail_out = fw_len;
690 zs.next_out = inflate_buffer;
691 status = inflate(&zs, Z_FINISH);
692 if (status != Z_STREAM_END) {
693 if_printf(sc->ifp, "zlib %d\n", status);
694 status = EIO;
695 goto abort_with_buffer;
698 /* Check id */
699 hdr_offset =
700 htobe32(*(const uint32_t *)(inflate_buffer + MCP_HEADER_PTR_OFFSET));
701 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
702 if_printf(sc->ifp, "Bad firmware file");
703 status = EIO;
704 goto abort_with_buffer;
706 hdr = (const void*)(inflate_buffer + hdr_offset);
708 status = mxge_validate_firmware(sc, hdr);
709 if (status != 0)
710 goto abort_with_buffer;
712 /* Copy the inflated firmware to NIC SRAM. */
713 for (i = 0; i < fw_len; i += 256) {
714 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, inflate_buffer + i,
715 min(256U, (unsigned)(fw_len - i)));
716 wmb();
717 dummy = *sc->sram;
718 wmb();
721 *limit = fw_len;
722 status = 0;
723 abort_with_buffer:
724 kfree(inflate_buffer, M_TEMP);
725 inflateEnd(&zs);
726 abort_with_fw:
727 firmware_put(fw, FIRMWARE_UNLOAD);
728 return status;
732 * Enable or disable periodic RDMAs from the host to make certain
733 * chipsets resend dropped PCIe messages
735 static void
736 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
738 char buf_bytes[72];
739 volatile uint32_t *confirm;
740 volatile char *submit;
741 uint32_t *buf, dma_low, dma_high;
742 int i;
744 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
746 /* Clear confirmation addr */
747 confirm = (volatile uint32_t *)sc->cmd;
748 *confirm = 0;
749 wmb();
752 * Send an rdma command to the PCIe engine, and wait for the
753 * response in the confirmation address. The firmware should
754 * write a -1 there to indicate it is alive and well
756 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
757 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
758 buf[0] = htobe32(dma_high); /* confirm addr MSW */
759 buf[1] = htobe32(dma_low); /* confirm addr LSW */
760 buf[2] = htobe32(0xffffffff); /* confirm data */
761 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.dmem_busaddr);
762 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.dmem_busaddr);
763 buf[3] = htobe32(dma_high); /* dummy addr MSW */
764 buf[4] = htobe32(dma_low); /* dummy addr LSW */
765 buf[5] = htobe32(enable); /* enable? */
767 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
769 mxge_pio_copy(submit, buf, 64);
770 wmb();
771 DELAY(1000);
772 wmb();
773 i = 0;
774 while (*confirm != 0xffffffff && i < 20) {
775 DELAY(1000);
776 i++;
778 if (*confirm != 0xffffffff) {
779 if_printf(sc->ifp, "dummy rdma %s failed (%p = 0x%x)",
780 (enable ? "enable" : "disable"), confirm, *confirm);
784 static int
785 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
787 mcp_cmd_t *buf;
788 char buf_bytes[sizeof(*buf) + 8];
789 volatile mcp_cmd_response_t *response = sc->cmd;
790 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
791 uint32_t dma_low, dma_high;
792 int err, sleep_total = 0;
794 /* Ensure buf is aligned to 8 bytes */
795 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
797 buf->data0 = htobe32(data->data0);
798 buf->data1 = htobe32(data->data1);
799 buf->data2 = htobe32(data->data2);
800 buf->cmd = htobe32(cmd);
801 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
802 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
804 buf->response_addr.low = htobe32(dma_low);
805 buf->response_addr.high = htobe32(dma_high);
807 response->result = 0xffffffff;
808 wmb();
809 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
812 * Wait up to 20ms
814 err = EAGAIN;
815 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
816 wmb();
817 switch (be32toh(response->result)) {
818 case 0:
819 data->data0 = be32toh(response->data);
820 err = 0;
821 break;
822 case 0xffffffff:
823 DELAY(1000);
824 break;
825 case MXGEFW_CMD_UNKNOWN:
826 err = ENOSYS;
827 break;
828 case MXGEFW_CMD_ERROR_UNALIGNED:
829 err = E2BIG;
830 break;
831 case MXGEFW_CMD_ERROR_BUSY:
832 err = EBUSY;
833 break;
834 case MXGEFW_CMD_ERROR_I2C_ABSENT:
835 err = ENXIO;
836 break;
837 default:
838 if_printf(sc->ifp, "command %d failed, result = %d\n",
839 cmd, be32toh(response->result));
840 err = ENXIO;
841 break;
843 if (err != EAGAIN)
844 break;
846 if (err == EAGAIN) {
847 if_printf(sc->ifp, "command %d timed out result = %d\n",
848 cmd, be32toh(response->result));
850 return err;
853 static int
854 mxge_adopt_running_firmware(mxge_softc_t *sc)
856 struct mcp_gen_header *hdr;
857 const size_t bytes = sizeof(struct mcp_gen_header);
858 size_t hdr_offset;
859 int status;
862 * Find running firmware header
864 hdr_offset =
865 htobe32(*(volatile uint32_t *)(sc->sram + MCP_HEADER_PTR_OFFSET));
867 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
868 if_printf(sc->ifp, "Running firmware has bad header offset "
869 "(%zu)\n", hdr_offset);
870 return EIO;
874 * Copy header of running firmware from SRAM to host memory to
875 * validate firmware
877 hdr = kmalloc(bytes, M_DEVBUF, M_WAITOK);
878 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
879 rman_get_bushandle(sc->mem_res), hdr_offset, (char *)hdr, bytes);
880 status = mxge_validate_firmware(sc, hdr);
881 kfree(hdr, M_DEVBUF);
884 * Check to see if adopted firmware has bug where adopting
885 * it will cause broadcasts to be filtered unless the NIC
886 * is kept in ALLMULTI mode
888 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
889 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
890 sc->adopted_rx_filter_bug = 1;
891 if_printf(sc->ifp, "Adopting fw %d.%d.%d: "
892 "working around rx filter bug\n",
893 sc->fw_ver_major, sc->fw_ver_minor, sc->fw_ver_tiny);
896 return status;
899 static int
900 mxge_load_firmware(mxge_softc_t *sc, int adopt)
902 volatile uint32_t *confirm;
903 volatile char *submit;
904 char buf_bytes[72];
905 uint32_t *buf, size, dma_low, dma_high;
906 int status, i;
908 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
910 size = sc->sram_size;
911 status = mxge_load_firmware_helper(sc, &size);
912 if (status) {
913 if (!adopt)
914 return status;
917 * Try to use the currently running firmware, if
918 * it is new enough
920 status = mxge_adopt_running_firmware(sc);
921 if (status) {
922 if_printf(sc->ifp,
923 "failed to adopt running firmware\n");
924 return status;
926 if_printf(sc->ifp, "Successfully adopted running firmware\n");
928 if (sc->tx_boundary == 4096) {
929 if_printf(sc->ifp,
930 "Using firmware currently running on NIC. "
931 "For optimal\n");
932 if_printf(sc->ifp, "performance consider loading "
933 "optimized firmware\n");
935 sc->fw_name = mxge_fw_unaligned;
936 sc->tx_boundary = 2048;
937 return 0;
940 /* Clear confirmation addr */
941 confirm = (volatile uint32_t *)sc->cmd;
942 *confirm = 0;
943 wmb();
946 * Send a reload command to the bootstrap MCP, and wait for the
947 * response in the confirmation address. The firmware should
948 * write a -1 there to indicate it is alive and well
951 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.dmem_busaddr);
952 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.dmem_busaddr);
954 buf[0] = htobe32(dma_high); /* confirm addr MSW */
955 buf[1] = htobe32(dma_low); /* confirm addr LSW */
956 buf[2] = htobe32(0xffffffff); /* confirm data */
959 * FIX: All newest firmware should un-protect the bottom of
960 * the sram before handoff. However, the very first interfaces
961 * do not. Therefore the handoff copy must skip the first 8 bytes
963 /* where the code starts*/
964 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
965 buf[4] = htobe32(size - 8); /* length of code */
966 buf[5] = htobe32(8); /* where to copy to */
967 buf[6] = htobe32(0); /* where to jump to */
969 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
970 mxge_pio_copy(submit, buf, 64);
971 wmb();
972 DELAY(1000);
973 wmb();
974 i = 0;
975 while (*confirm != 0xffffffff && i < 20) {
976 DELAY(1000*10);
977 i++;
979 if (*confirm != 0xffffffff) {
980 if_printf(sc->ifp,"handoff failed (%p = 0x%x)",
981 confirm, *confirm);
982 return ENXIO;
984 return 0;
987 static int
988 mxge_update_mac_address(mxge_softc_t *sc)
990 mxge_cmd_t cmd;
991 uint8_t *addr = sc->mac_addr;
993 cmd.data0 = (addr[0] << 24) | (addr[1] << 16) |
994 (addr[2] << 8) | addr[3];
995 cmd.data1 = (addr[4] << 8) | (addr[5]);
996 return mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
999 static int
1000 mxge_change_pause(mxge_softc_t *sc, int pause)
1002 mxge_cmd_t cmd;
1003 int status;
1005 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */
1006 if (pause)
1007 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, &cmd);
1008 else
1009 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, &cmd);
1010 if (status) {
1011 if_printf(sc->ifp, "Failed to set flow control mode\n");
1012 return ENXIO;
1014 sc->pause = pause;
1015 return 0;
1018 static void
1019 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1021 mxge_cmd_t cmd;
1022 int status;
1024 bzero(&cmd, sizeof(cmd)); /* avoid gcc warning */
1025 if (mxge_always_promisc)
1026 promisc = 1;
1028 if (promisc)
1029 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, &cmd);
1030 else
1031 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, &cmd);
1032 if (status)
1033 if_printf(sc->ifp, "Failed to set promisc mode\n");
1036 static void
1037 mxge_set_multicast_list(mxge_softc_t *sc)
1039 mxge_cmd_t cmd;
1040 struct ifmultiaddr *ifma;
1041 struct ifnet *ifp = sc->ifp;
1042 int err;
1044 /* This firmware is known to not support multicast */
1045 if (!sc->fw_multicast_support)
1046 return;
1048 /* Disable multicast filtering while we play with the lists*/
1049 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */
1050 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1051 if (err != 0) {
1052 if_printf(ifp, "Failed MXGEFW_ENABLE_ALLMULTI, "
1053 "error status: %d\n", err);
1054 return;
1057 if (sc->adopted_rx_filter_bug)
1058 return;
1060 if (ifp->if_flags & IFF_ALLMULTI) {
1061 /* Request to disable multicast filtering, so quit here */
1062 return;
1065 /* Flush all the filters */
1066 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1067 if (err != 0) {
1068 if_printf(ifp, "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, "
1069 "error status: %d\n", err);
1070 return;
1074 * Walk the multicast list, and add each address
1076 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1077 if (ifma->ifma_addr->sa_family != AF_LINK)
1078 continue;
1080 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1081 &cmd.data0, 4);
1082 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1083 &cmd.data1, 2);
1084 cmd.data0 = htonl(cmd.data0);
1085 cmd.data1 = htonl(cmd.data1);
1086 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1087 if (err != 0) {
1088 if_printf(ifp, "Failed MXGEFW_JOIN_MULTICAST_GROUP, "
1089 "error status: %d\n", err);
1090 /* Abort, leaving multicast filtering off */
1091 return;
1095 /* Enable multicast filtering */
1096 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1097 if (err != 0) {
1098 if_printf(ifp, "Failed MXGEFW_DISABLE_ALLMULTI, "
1099 "error status: %d\n", err);
1103 #if 0
1104 static int
1105 mxge_max_mtu(mxge_softc_t *sc)
1107 mxge_cmd_t cmd;
1108 int status;
1110 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1111 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1113 /* try to set nbufs to see if it we can
1114 use virtually contiguous jumbos */
1115 cmd.data0 = 0;
1116 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1117 &cmd);
1118 if (status == 0)
1119 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1121 /* otherwise, we're limited to MJUMPAGESIZE */
1122 return MJUMPAGESIZE - MXGEFW_PAD;
1124 #endif
1126 static int
1127 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1129 struct mxge_slice_state *ss;
1130 mxge_rx_done_t *rx_done;
1131 volatile uint32_t *irq_claim;
1132 mxge_cmd_t cmd;
1133 int slice, status, rx_intr_size;
1136 * Try to send a reset command to the card to see if it
1137 * is alive
1139 memset(&cmd, 0, sizeof (cmd));
1140 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1141 if (status != 0) {
1142 if_printf(sc->ifp, "failed reset\n");
1143 return ENXIO;
1146 mxge_dummy_rdma(sc, 1);
1149 * Set the intrq size
1150 * XXX assume 4byte mcp_slot
1152 rx_intr_size = sc->rx_intr_slots * sizeof(mcp_slot_t);
1153 cmd.data0 = rx_intr_size;
1154 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1157 * Even though we already know how many slices are supported
1158 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1159 * has magic side effects, and must be called after a reset.
1160 * It must be called prior to calling any RSS related cmds,
1161 * including assigning an interrupt queue for anything but
1162 * slice 0. It must also be called *after*
1163 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1164 * the firmware to compute offsets.
1166 if (sc->num_slices > 1) {
1167 /* Ask the maximum number of slices it supports */
1168 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
1169 if (status != 0) {
1170 if_printf(sc->ifp, "failed to get number of slices\n");
1171 return status;
1175 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1176 * to setting up the interrupt queue DMA
1178 cmd.data0 = sc->num_slices;
1179 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1180 if (sc->num_tx_rings > 1)
1181 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1182 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd);
1183 if (status != 0) {
1184 if_printf(sc->ifp, "failed to set number of slices\n");
1185 return status;
1189 if (interrupts_setup) {
1190 /* Now exchange information about interrupts */
1191 for (slice = 0; slice < sc->num_slices; slice++) {
1192 ss = &sc->ss[slice];
1194 rx_done = &ss->rx_data.rx_done;
1195 memset(rx_done->entry, 0, rx_intr_size);
1197 cmd.data0 =
1198 MXGE_LOWPART_TO_U32(ss->rx_done_dma.dmem_busaddr);
1199 cmd.data1 =
1200 MXGE_HIGHPART_TO_U32(ss->rx_done_dma.dmem_busaddr);
1201 cmd.data2 = slice;
1202 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA,
1203 &cmd);
1207 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET,
1208 &cmd);
1209 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1211 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1212 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1214 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd);
1215 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1217 if (status != 0) {
1218 if_printf(sc->ifp, "failed set interrupt parameters\n");
1219 return status;
1222 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1224 /* Run a DMA benchmark */
1225 mxge_dma_test(sc, MXGEFW_DMA_TEST);
1227 for (slice = 0; slice < sc->num_slices; slice++) {
1228 ss = &sc->ss[slice];
1230 ss->irq_claim = irq_claim + (2 * slice);
1232 /* Reset mcp/driver shared state back to 0 */
1233 ss->rx_data.rx_done.idx = 0;
1234 ss->tx.req = 0;
1235 ss->tx.done = 0;
1236 ss->tx.pkt_done = 0;
1237 ss->tx.queue_active = 0;
1238 ss->tx.activate = 0;
1239 ss->tx.deactivate = 0;
1240 ss->rx_data.rx_big.cnt = 0;
1241 ss->rx_data.rx_small.cnt = 0;
1242 if (ss->fw_stats != NULL)
1243 bzero(ss->fw_stats, sizeof(*ss->fw_stats));
1245 sc->rdma_tags_available = 15;
1247 status = mxge_update_mac_address(sc);
1248 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1249 mxge_change_pause(sc, sc->pause);
1250 mxge_set_multicast_list(sc);
1252 if (sc->throttle) {
1253 cmd.data0 = sc->throttle;
1254 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd))
1255 if_printf(sc->ifp, "can't enable throttle\n");
1257 return status;
1260 static int
1261 mxge_change_throttle(SYSCTL_HANDLER_ARGS)
1263 mxge_cmd_t cmd;
1264 mxge_softc_t *sc;
1265 int err;
1266 unsigned int throttle;
1268 sc = arg1;
1269 throttle = sc->throttle;
1270 err = sysctl_handle_int(oidp, &throttle, arg2, req);
1271 if (err != 0)
1272 return err;
1274 if (throttle == sc->throttle)
1275 return 0;
1277 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE)
1278 return EINVAL;
1280 ifnet_serialize_all(sc->ifp);
1282 cmd.data0 = throttle;
1283 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd);
1284 if (err == 0)
1285 sc->throttle = throttle;
1287 ifnet_deserialize_all(sc->ifp);
1288 return err;
1291 static int
1292 mxge_change_use_rss(SYSCTL_HANDLER_ARGS)
1294 mxge_softc_t *sc;
1295 int err, use_rss;
1297 sc = arg1;
1298 use_rss = sc->use_rss;
1299 err = sysctl_handle_int(oidp, &use_rss, arg2, req);
1300 if (err != 0)
1301 return err;
1303 if (use_rss == sc->use_rss)
1304 return 0;
1306 ifnet_serialize_all(sc->ifp);
1308 sc->use_rss = use_rss;
1309 if (sc->ifp->if_flags & IFF_RUNNING) {
1310 mxge_close(sc, 0);
1311 mxge_open(sc);
1314 ifnet_deserialize_all(sc->ifp);
1315 return err;
1318 static int
1319 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1321 mxge_softc_t *sc;
1322 unsigned int intr_coal_delay;
1323 int err;
1325 sc = arg1;
1326 intr_coal_delay = sc->intr_coal_delay;
1327 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1328 if (err != 0)
1329 return err;
1331 if (intr_coal_delay == sc->intr_coal_delay)
1332 return 0;
1334 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1335 return EINVAL;
1337 ifnet_serialize_all(sc->ifp);
1339 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1340 sc->intr_coal_delay = intr_coal_delay;
1342 ifnet_deserialize_all(sc->ifp);
1343 return err;
1346 static int
1347 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1349 int err;
1351 if (arg1 == NULL)
1352 return EFAULT;
1353 arg2 = be32toh(*(int *)arg1);
1354 arg1 = NULL;
1355 err = sysctl_handle_int(oidp, arg1, arg2, req);
1357 return err;
1360 static void
1361 mxge_rem_sysctls(mxge_softc_t *sc)
1363 if (sc->ss != NULL) {
1364 struct mxge_slice_state *ss;
1365 int slice;
1367 for (slice = 0; slice < sc->num_slices; slice++) {
1368 ss = &sc->ss[slice];
1369 if (ss->sysctl_tree != NULL) {
1370 sysctl_ctx_free(&ss->sysctl_ctx);
1371 ss->sysctl_tree = NULL;
1376 if (sc->slice_sysctl_tree != NULL) {
1377 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1378 sc->slice_sysctl_tree = NULL;
1382 static void
1383 mxge_add_sysctls(mxge_softc_t *sc)
1385 struct sysctl_ctx_list *ctx;
1386 struct sysctl_oid_list *children;
1387 mcp_irq_data_t *fw;
1388 struct mxge_slice_state *ss;
1389 int slice;
1390 char slice_num[8];
1392 ctx = device_get_sysctl_ctx(sc->dev);
1393 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1394 fw = sc->ss[0].fw_stats;
1397 * Random information
1399 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
1400 CTLFLAG_RD, &sc->fw_version, 0, "firmware version");
1402 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "serial_number",
1403 CTLFLAG_RD, &sc->serial_number_string, 0, "serial number");
1405 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "product_code",
1406 CTLFLAG_RD, &sc->product_code_string, 0, "product code");
1408 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "pcie_link_width",
1409 CTLFLAG_RD, &sc->link_width, 0, "link width");
1411 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_boundary",
1412 CTLFLAG_RD, &sc->tx_boundary, 0, "tx boundary");
1414 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_combine",
1415 CTLFLAG_RD, &sc->wc, 0, "write combining PIO");
1417 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_dma_MBs",
1418 CTLFLAG_RD, &sc->read_dma, 0, "DMA Read speed in MB/s");
1420 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "write_dma_MBs",
1421 CTLFLAG_RD, &sc->write_dma, 0, "DMA Write speed in MB/s");
1423 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "read_write_dma_MBs",
1424 CTLFLAG_RD, &sc->read_write_dma, 0,
1425 "DMA concurrent Read/Write speed in MB/s");
1427 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "watchdog_resets",
1428 CTLFLAG_RD, &sc->watchdog_resets, 0,
1429 "Number of times NIC was reset");
1431 if (sc->num_slices > 1) {
1432 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "slice_cpumap",
1433 CTLTYPE_OPAQUE | CTLFLAG_RD, sc->ring_map, 0,
1434 if_ringmap_cpumap_sysctl, "I", "slice CPU map");
1438 * Performance related tunables
1440 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal_delay",
1441 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_intr_coal, "I",
1442 "Interrupt coalescing delay in usecs");
1444 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "throttle",
1445 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_throttle, "I",
1446 "Transmit throttling");
1448 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "use_rss",
1449 CTLTYPE_INT|CTLFLAG_RW, sc, 0, mxge_change_use_rss, "I",
1450 "Use RSS");
1452 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "deassert_wait",
1453 CTLFLAG_RW, &mxge_deassert_wait, 0,
1454 "Wait for IRQ line to go low in ihandler");
1457 * Stats block from firmware is in network byte order.
1458 * Need to swap it
1460 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "link_up",
1461 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 0,
1462 mxge_handle_be32, "I", "link up");
1464 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_tags_available",
1465 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 0,
1466 mxge_handle_be32, "I", "rdma_tags_available");
1468 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_crc32",
1469 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_crc32, 0,
1470 mxge_handle_be32, "I", "dropped_bad_crc32");
1472 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_bad_phy",
1473 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_bad_phy, 0,
1474 mxge_handle_be32, "I", "dropped_bad_phy");
1476 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_error_or_filtered",
1477 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_error_or_filtered, 0,
1478 mxge_handle_be32, "I", "dropped_link_error_or_filtered");
1480 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_link_overflow",
1481 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 0,
1482 mxge_handle_be32, "I", "dropped_link_overflow");
1484 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_multicast_filtered",
1485 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_multicast_filtered, 0,
1486 mxge_handle_be32, "I", "dropped_multicast_filtered");
1488 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_big_buffer",
1489 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 0,
1490 mxge_handle_be32, "I", "dropped_no_big_buffer");
1492 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_no_small_buffer",
1493 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_small_buffer, 0,
1494 mxge_handle_be32, "I", "dropped_no_small_buffer");
1496 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_overrun",
1497 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 0,
1498 mxge_handle_be32, "I", "dropped_overrun");
1500 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_pause",
1501 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_pause, 0,
1502 mxge_handle_be32, "I", "dropped_pause");
1504 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_runt",
1505 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 0,
1506 mxge_handle_be32, "I", "dropped_runt");
1508 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dropped_unicast_filtered",
1509 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 0,
1510 mxge_handle_be32, "I", "dropped_unicast_filtered");
1512 /* add counters exported for debugging from all slices */
1513 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1514 sc->slice_sysctl_tree = SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx,
1515 children, OID_AUTO, "slice", CTLFLAG_RD, 0, "");
1516 if (sc->slice_sysctl_tree == NULL) {
1517 device_printf(sc->dev, "can't add slice sysctl node\n");
1518 return;
1521 for (slice = 0; slice < sc->num_slices; slice++) {
1522 ss = &sc->ss[slice];
1523 sysctl_ctx_init(&ss->sysctl_ctx);
1524 ctx = &ss->sysctl_ctx;
1525 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1526 ksprintf(slice_num, "%d", slice);
1527 ss->sysctl_tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
1528 slice_num, CTLFLAG_RD, 0, "");
1529 if (ss->sysctl_tree == NULL) {
1530 device_printf(sc->dev,
1531 "can't add %d slice sysctl node\n", slice);
1532 return; /* XXX continue? */
1534 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1537 * XXX change to ULONG
1540 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_small_cnt",
1541 CTLFLAG_RD, &ss->rx_data.rx_small.cnt, 0, "rx_small_cnt");
1543 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_big_cnt",
1544 CTLFLAG_RD, &ss->rx_data.rx_big.cnt, 0, "rx_small_cnt");
1546 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_req",
1547 CTLFLAG_RD, &ss->tx.req, 0, "tx_req");
1549 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done",
1550 CTLFLAG_RD, &ss->tx.done, 0, "tx_done");
1552 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_pkt_done",
1553 CTLFLAG_RD, &ss->tx.pkt_done, 0, "tx_done");
1555 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_queue_active",
1556 CTLFLAG_RD, &ss->tx.queue_active, 0, "tx_queue_active");
1558 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_activate",
1559 CTLFLAG_RD, &ss->tx.activate, 0, "tx_activate");
1561 SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_deactivate",
1562 CTLFLAG_RD, &ss->tx.deactivate, 0, "tx_deactivate");
1567 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1568 * backwards one at a time and handle ring wraps
1570 static __inline void
1571 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1572 mcp_kreq_ether_send_t *src, int cnt)
1574 int idx, starting_slot;
1576 starting_slot = tx->req;
1577 while (cnt > 1) {
1578 cnt--;
1579 idx = (starting_slot + cnt) & tx->mask;
1580 mxge_pio_copy(&tx->lanai[idx], &src[cnt], sizeof(*src));
1581 wmb();
1586 * Copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1587 * at most 32 bytes at a time, so as to avoid involving the software
1588 * pio handler in the nic. We re-write the first segment's flags
1589 * to mark them valid only after writing the entire chain
1591 static __inline void
1592 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, int cnt)
1594 int idx, i;
1595 uint32_t *src_ints;
1596 volatile uint32_t *dst_ints;
1597 mcp_kreq_ether_send_t *srcp;
1598 volatile mcp_kreq_ether_send_t *dstp, *dst;
1599 uint8_t last_flags;
1601 idx = tx->req & tx->mask;
1603 last_flags = src->flags;
1604 src->flags = 0;
1605 wmb();
1606 dst = dstp = &tx->lanai[idx];
1607 srcp = src;
1609 if ((idx + cnt) < tx->mask) {
1610 for (i = 0; i < cnt - 1; i += 2) {
1611 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1612 wmb(); /* force write every 32 bytes */
1613 srcp += 2;
1614 dstp += 2;
1616 } else {
1618 * Submit all but the first request, and ensure
1619 * that it is submitted below
1621 mxge_submit_req_backwards(tx, src, cnt);
1622 i = 0;
1624 if (i < cnt) {
1625 /* Submit the first request */
1626 mxge_pio_copy(dstp, srcp, sizeof(*src));
1627 wmb(); /* barrier before setting valid flag */
1630 /* Re-write the last 32-bits with the valid flags */
1631 src->flags = last_flags;
1632 src_ints = (uint32_t *)src;
1633 src_ints+=3;
1634 dst_ints = (volatile uint32_t *)dst;
1635 dst_ints+=3;
1636 *dst_ints = *src_ints;
1637 tx->req += cnt;
1638 wmb();
1641 static int
1642 mxge_pullup_tso(struct mbuf **mp)
1644 int hoff, iphlen, thoff;
1645 struct mbuf *m;
1647 m = *mp;
1648 KASSERT(M_WRITABLE(m), ("TSO mbuf not writable"));
1650 iphlen = m->m_pkthdr.csum_iphlen;
1651 thoff = m->m_pkthdr.csum_thlen;
1652 hoff = m->m_pkthdr.csum_lhlen;
1654 KASSERT(iphlen > 0, ("invalid ip hlen"));
1655 KASSERT(thoff > 0, ("invalid tcp hlen"));
1656 KASSERT(hoff > 0, ("invalid ether hlen"));
1658 if (__predict_false(m->m_len < hoff + iphlen + thoff)) {
1659 m = m_pullup(m, hoff + iphlen + thoff);
1660 if (m == NULL) {
1661 *mp = NULL;
1662 return ENOBUFS;
1664 *mp = m;
1666 return 0;
1669 static int
1670 mxge_encap_tso(mxge_tx_ring_t *tx, struct mxge_buffer_state *info_map,
1671 struct mbuf *m, int busdma_seg_cnt)
1673 mcp_kreq_ether_send_t *req;
1674 bus_dma_segment_t *seg;
1675 uint32_t low, high_swapped;
1676 int len, seglen, cum_len, cum_len_next;
1677 int next_is_first, chop, cnt, rdma_count, small;
1678 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1679 uint8_t flags, flags_next;
1680 struct mxge_buffer_state *info_last;
1681 bus_dmamap_t map = info_map->map;
1683 mss = m->m_pkthdr.tso_segsz;
1686 * Negative cum_len signifies to the send loop that we are
1687 * still in the header portion of the TSO packet.
1689 cum_len = -(m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen +
1690 m->m_pkthdr.csum_thlen);
1693 * TSO implies checksum offload on this hardware
1695 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen;
1696 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1699 * For TSO, pseudo_hdr_offset holds mss. The firmware figures
1700 * out where to put the checksum by parsing the header.
1702 pseudo_hdr_offset = htobe16(mss);
1704 req = tx->req_list;
1705 seg = tx->seg_list;
1706 cnt = 0;
1707 rdma_count = 0;
1710 * "rdma_count" is the number of RDMAs belonging to the current
1711 * packet BEFORE the current send request. For non-TSO packets,
1712 * this is equal to "count".
1714 * For TSO packets, rdma_count needs to be reset to 0 after a
1715 * segment cut.
1717 * The rdma_count field of the send request is the number of
1718 * RDMAs of the packet starting at that request. For TSO send
1719 * requests with one ore more cuts in the middle, this is the
1720 * number of RDMAs starting after the last cut in the request.
1721 * All previous segments before the last cut implicitly have 1
1722 * RDMA.
1724 * Since the number of RDMAs is not known beforehand, it must be
1725 * filled-in retroactively - after each segmentation cut or at
1726 * the end of the entire packet.
1729 while (busdma_seg_cnt) {
1731 * Break the busdma segment up into pieces
1733 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1734 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1735 len = seg->ds_len;
1737 while (len) {
1738 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1739 seglen = len;
1740 cum_len_next = cum_len + seglen;
1741 (req - rdma_count)->rdma_count = rdma_count + 1;
1742 if (__predict_true(cum_len >= 0)) {
1743 /* Payload */
1744 chop = (cum_len_next > mss);
1745 cum_len_next = cum_len_next % mss;
1746 next_is_first = (cum_len_next == 0);
1747 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1748 flags_next |=
1749 next_is_first * MXGEFW_FLAGS_FIRST;
1750 rdma_count |= -(chop | next_is_first);
1751 rdma_count += chop & !next_is_first;
1752 } else if (cum_len_next >= 0) {
1753 /* Header ends */
1754 rdma_count = -1;
1755 cum_len_next = 0;
1756 seglen = -cum_len;
1757 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1758 flags_next = MXGEFW_FLAGS_TSO_PLD |
1759 MXGEFW_FLAGS_FIRST |
1760 (small * MXGEFW_FLAGS_SMALL);
1763 req->addr_high = high_swapped;
1764 req->addr_low = htobe32(low);
1765 req->pseudo_hdr_offset = pseudo_hdr_offset;
1766 req->pad = 0;
1767 req->rdma_count = 1;
1768 req->length = htobe16(seglen);
1769 req->cksum_offset = cksum_offset;
1770 req->flags =
1771 flags | ((cum_len & 1) * MXGEFW_FLAGS_ALIGN_ODD);
1772 low += seglen;
1773 len -= seglen;
1774 cum_len = cum_len_next;
1775 flags = flags_next;
1776 req++;
1777 cnt++;
1778 rdma_count++;
1779 if (__predict_false(cksum_offset > seglen))
1780 cksum_offset -= seglen;
1781 else
1782 cksum_offset = 0;
1783 if (__predict_false(cnt > tx->max_desc))
1784 goto drop;
1786 busdma_seg_cnt--;
1787 seg++;
1789 (req - rdma_count)->rdma_count = rdma_count;
1791 do {
1792 req--;
1793 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1794 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1796 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask];
1798 info_map->map = info_last->map;
1799 info_last->map = map;
1800 info_last->m = m;
1802 mxge_submit_req(tx, tx->req_list, cnt);
1804 if (tx->send_go != NULL && tx->queue_active == 0) {
1805 /* Tell the NIC to start polling this slice */
1806 *tx->send_go = 1;
1807 tx->queue_active = 1;
1808 tx->activate++;
1809 wmb();
1811 return 0;
1813 drop:
1814 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1815 m_freem(m);
1816 return ENOBUFS;
1819 static int
1820 mxge_encap(mxge_tx_ring_t *tx, struct mbuf *m, bus_addr_t zeropad)
1822 mcp_kreq_ether_send_t *req;
1823 bus_dma_segment_t *seg;
1824 bus_dmamap_t map;
1825 int cnt, cum_len, err, i, idx, odd_flag;
1826 uint16_t pseudo_hdr_offset;
1827 uint8_t flags, cksum_offset;
1828 struct mxge_buffer_state *info_map, *info_last;
1830 if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1831 err = mxge_pullup_tso(&m);
1832 if (__predict_false(err))
1833 return err;
1837 * Map the frame for DMA
1839 idx = tx->req & tx->mask;
1840 info_map = &tx->info[idx];
1841 map = info_map->map;
1843 err = bus_dmamap_load_mbuf_defrag(tx->dmat, map, &m,
1844 tx->seg_list, tx->max_desc - 2, &cnt, BUS_DMA_NOWAIT);
1845 if (__predict_false(err != 0))
1846 goto drop;
1847 bus_dmamap_sync(tx->dmat, map, BUS_DMASYNC_PREWRITE);
1850 * TSO is different enough, we handle it in another routine
1852 if (m->m_pkthdr.csum_flags & CSUM_TSO)
1853 return mxge_encap_tso(tx, info_map, m, cnt);
1855 req = tx->req_list;
1856 cksum_offset = 0;
1857 pseudo_hdr_offset = 0;
1858 flags = MXGEFW_FLAGS_NO_TSO;
1861 * Checksum offloading
1863 if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1864 cksum_offset = m->m_pkthdr.csum_lhlen + m->m_pkthdr.csum_iphlen;
1865 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
1866 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
1867 req->cksum_offset = cksum_offset;
1868 flags |= MXGEFW_FLAGS_CKSUM;
1869 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
1870 } else {
1871 odd_flag = 0;
1873 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
1874 flags |= MXGEFW_FLAGS_SMALL;
1877 * Convert segments into a request list
1879 cum_len = 0;
1880 seg = tx->seg_list;
1881 req->flags = MXGEFW_FLAGS_FIRST;
1882 for (i = 0; i < cnt; i++) {
1883 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
1884 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1885 req->length = htobe16(seg->ds_len);
1886 req->cksum_offset = cksum_offset;
1887 if (cksum_offset > seg->ds_len)
1888 cksum_offset -= seg->ds_len;
1889 else
1890 cksum_offset = 0;
1891 req->pseudo_hdr_offset = pseudo_hdr_offset;
1892 req->pad = 0; /* complete solid 16-byte block */
1893 req->rdma_count = 1;
1894 req->flags |= flags | ((cum_len & 1) * odd_flag);
1895 cum_len += seg->ds_len;
1896 seg++;
1897 req++;
1898 req->flags = 0;
1900 req--;
1903 * Pad runt to 60 bytes
1905 if (cum_len < 60) {
1906 req++;
1907 req->addr_low = htobe32(MXGE_LOWPART_TO_U32(zeropad));
1908 req->addr_high = htobe32(MXGE_HIGHPART_TO_U32(zeropad));
1909 req->length = htobe16(60 - cum_len);
1910 req->cksum_offset = 0;
1911 req->pseudo_hdr_offset = pseudo_hdr_offset;
1912 req->pad = 0; /* complete solid 16-byte block */
1913 req->rdma_count = 1;
1914 req->flags |= flags | ((cum_len & 1) * odd_flag);
1915 cnt++;
1918 tx->req_list[0].rdma_count = cnt;
1919 #if 0
1920 /* print what the firmware will see */
1921 for (i = 0; i < cnt; i++) {
1922 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d,"
1923 "cso:%d, flags:0x%x, rdma:%d\n",
1924 i, (int)ntohl(tx->req_list[i].addr_high),
1925 (int)ntohl(tx->req_list[i].addr_low),
1926 (int)ntohs(tx->req_list[i].length),
1927 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
1928 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
1929 tx->req_list[i].rdma_count);
1931 kprintf("--------------\n");
1932 #endif
1933 info_last = &tx->info[((cnt - 1) + tx->req) & tx->mask];
1935 info_map->map = info_last->map;
1936 info_last->map = map;
1937 info_last->m = m;
1939 mxge_submit_req(tx, tx->req_list, cnt);
1941 if (tx->send_go != NULL && tx->queue_active == 0) {
1942 /* Tell the NIC to start polling this slice */
1943 *tx->send_go = 1;
1944 tx->queue_active = 1;
1945 tx->activate++;
1946 wmb();
1948 return 0;
1950 drop:
1951 m_freem(m);
1952 return err;
1955 static void
1956 mxge_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1958 mxge_softc_t *sc = ifp->if_softc;
1959 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq);
1960 bus_addr_t zeropad;
1961 int encap = 0;
1963 KKASSERT(tx->ifsq == ifsq);
1964 ASSERT_SERIALIZED(&tx->tx_serialize);
1966 if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
1967 return;
1969 zeropad = sc->zeropad_dma.dmem_busaddr;
1970 while (tx->mask - (tx->req - tx->done) > tx->max_desc) {
1971 struct mbuf *m;
1972 int error;
1974 m = ifsq_dequeue(ifsq);
1975 if (m == NULL)
1976 goto done;
1978 BPF_MTAP(ifp, m);
1979 error = mxge_encap(tx, m, zeropad);
1980 if (!error)
1981 encap = 1;
1982 else
1983 IFNET_STAT_INC(ifp, oerrors, 1);
1986 /* Ran out of transmit slots */
1987 ifsq_set_oactive(ifsq);
1988 done:
1989 if (encap)
1990 ifsq_watchdog_set_count(&tx->watchdog, 5);
1993 static void
1994 mxge_watchdog(struct ifaltq_subque *ifsq)
1996 struct ifnet *ifp = ifsq_get_ifp(ifsq);
1997 struct mxge_softc *sc = ifp->if_softc;
1998 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
1999 mxge_tx_ring_t *tx = ifsq_get_priv(ifsq);
2001 ASSERT_IFNET_SERIALIZED_ALL(ifp);
2003 /* Check for pause blocking before resetting */
2004 if (tx->watchdog_rx_pause == rx_pause) {
2005 mxge_warn_stuck(sc, tx, 0);
2006 mxge_watchdog_reset(sc);
2007 return;
2008 } else {
2009 if_printf(ifp, "Flow control blocking xmits, "
2010 "check link partner\n");
2012 tx->watchdog_rx_pause = rx_pause;
2016 * Copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2017 * at most 32 bytes at a time, so as to avoid involving the software
2018 * pio handler in the nic. We re-write the first segment's low
2019 * DMA address to mark it valid only after we write the entire chunk
2020 * in a burst
2022 static __inline void
2023 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2024 mcp_kreq_ether_recv_t *src)
2026 uint32_t low;
2028 low = src->addr_low;
2029 src->addr_low = 0xffffffff;
2030 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2031 wmb();
2032 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2033 wmb();
2034 src->addr_low = low;
2035 dst->addr_low = low;
2036 wmb();
2039 static int
2040 mxge_get_buf_small(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx,
2041 boolean_t init)
2043 bus_dma_segment_t seg;
2044 struct mbuf *m;
2045 int cnt, err, mflag;
2047 mflag = M_NOWAIT;
2048 if (__predict_false(init))
2049 mflag = M_WAITOK;
2051 m = m_gethdr(mflag, MT_DATA);
2052 if (m == NULL) {
2053 err = ENOBUFS;
2054 if (__predict_false(init)) {
2056 * During initialization, there
2057 * is nothing to setup; bail out
2059 return err;
2061 goto done;
2063 m->m_len = m->m_pkthdr.len = MHLEN;
2065 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2066 &seg, 1, &cnt, BUS_DMA_NOWAIT);
2067 if (err != 0) {
2068 m_freem(m);
2069 if (__predict_false(init)) {
2071 * During initialization, there
2072 * is nothing to setup; bail out
2074 return err;
2076 goto done;
2079 rx->info[idx].m = m;
2080 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2081 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2083 done:
2084 if ((idx & 7) == 7)
2085 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2086 return err;
2089 static int
2090 mxge_get_buf_big(mxge_rx_ring_t *rx, bus_dmamap_t map, int idx,
2091 boolean_t init)
2093 bus_dma_segment_t seg;
2094 struct mbuf *m;
2095 int cnt, err, mflag;
2097 mflag = M_NOWAIT;
2098 if (__predict_false(init))
2099 mflag = M_WAITOK;
2101 if (rx->cl_size == MCLBYTES)
2102 m = m_getcl(mflag, MT_DATA, M_PKTHDR);
2103 else
2104 m = m_getjcl(mflag, MT_DATA, M_PKTHDR, MJUMPAGESIZE);
2105 if (m == NULL) {
2106 err = ENOBUFS;
2107 if (__predict_false(init)) {
2109 * During initialization, there
2110 * is nothing to setup; bail out
2112 return err;
2114 goto done;
2116 m->m_len = m->m_pkthdr.len = rx->cl_size;
2118 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2119 &seg, 1, &cnt, BUS_DMA_NOWAIT);
2120 if (err != 0) {
2121 m_freem(m);
2122 if (__predict_false(init)) {
2124 * During initialization, there
2125 * is nothing to setup; bail out
2127 return err;
2129 goto done;
2132 rx->info[idx].m = m;
2133 rx->shadow[idx].addr_low = htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2134 rx->shadow[idx].addr_high = htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2136 done:
2137 if ((idx & 7) == 7)
2138 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2139 return err;
2143 * Myri10GE hardware checksums are not valid if the sender
2144 * padded the frame with non-zero padding. This is because
2145 * the firmware just does a simple 16-bit 1s complement
2146 * checksum across the entire frame, excluding the first 14
2147 * bytes. It is best to simply to check the checksum and
2148 * tell the stack about it only if the checksum is good
2150 static __inline uint16_t
2151 mxge_rx_csum(struct mbuf *m, int csum)
2153 const struct ether_header *eh;
2154 const struct ip *ip;
2155 uint16_t c;
2157 eh = mtod(m, const struct ether_header *);
2159 /* Only deal with IPv4 TCP & UDP for now */
2160 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2161 return 1;
2163 ip = (const struct ip *)(eh + 1);
2164 if (__predict_false(ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP))
2165 return 1;
2167 #ifdef INET
2168 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2169 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2170 - (ip->ip_hl << 2) + ip->ip_p));
2171 #else
2172 c = 1;
2173 #endif
2174 c ^= 0xffff;
2175 return c;
2178 static void
2179 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2181 struct ether_vlan_header *evl;
2182 uint32_t partial;
2184 evl = mtod(m, struct ether_vlan_header *);
2187 * Fix checksum by subtracting EVL_ENCAPLEN bytes after
2188 * what the firmware thought was the end of the ethernet
2189 * header.
2192 /* Put checksum into host byte order */
2193 *csum = ntohs(*csum);
2195 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2196 *csum += ~partial;
2197 *csum += ((*csum) < ~partial);
2198 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2199 *csum = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2202 * Restore checksum to network byte order;
2203 * later consumers expect this
2205 *csum = htons(*csum);
2207 /* save the tag */
2208 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
2209 m->m_flags |= M_VLANTAG;
2212 * Remove the 802.1q header by copying the Ethernet
2213 * addresses over it and adjusting the beginning of
2214 * the data in the mbuf. The encapsulated Ethernet
2215 * type field is already in place.
2217 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN,
2218 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2219 m_adj(m, EVL_ENCAPLEN);
2223 static __inline void
2224 mxge_rx_done_big(struct ifnet *ifp, mxge_rx_ring_t *rx,
2225 uint32_t len, uint32_t csum)
2227 struct mbuf *m;
2228 const struct ether_header *eh;
2229 bus_dmamap_t old_map;
2230 int idx;
2232 idx = rx->cnt & rx->mask;
2233 rx->cnt++;
2235 /* Save a pointer to the received mbuf */
2236 m = rx->info[idx].m;
2238 /* Try to replace the received mbuf */
2239 if (mxge_get_buf_big(rx, rx->extra_map, idx, FALSE)) {
2240 /* Drop the frame -- the old mbuf is re-cycled */
2241 IFNET_STAT_INC(ifp, ierrors, 1);
2242 return;
2245 /* Unmap the received buffer */
2246 old_map = rx->info[idx].map;
2247 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2248 bus_dmamap_unload(rx->dmat, old_map);
2250 /* Swap the bus_dmamap_t's */
2251 rx->info[idx].map = rx->extra_map;
2252 rx->extra_map = old_map;
2255 * mcp implicitly skips 1st 2 bytes so that packet is properly
2256 * aligned
2258 m->m_data += MXGEFW_PAD;
2260 m->m_pkthdr.rcvif = ifp;
2261 m->m_len = m->m_pkthdr.len = len;
2263 IFNET_STAT_INC(ifp, ipackets, 1);
2265 eh = mtod(m, const struct ether_header *);
2266 if (eh->ether_type == htons(ETHERTYPE_VLAN))
2267 mxge_vlan_tag_remove(m, &csum);
2269 /* If the checksum is valid, mark it in the mbuf header */
2270 if ((ifp->if_capenable & IFCAP_RXCSUM) &&
2271 mxge_rx_csum(m, csum) == 0) {
2272 /* Tell the stack that the checksum is good */
2273 m->m_pkthdr.csum_data = 0xffff;
2274 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2275 CSUM_DATA_VALID;
2277 ifp->if_input(ifp, m, NULL, -1);
2280 static __inline void
2281 mxge_rx_done_small(struct ifnet *ifp, mxge_rx_ring_t *rx,
2282 uint32_t len, uint32_t csum)
2284 const struct ether_header *eh;
2285 struct mbuf *m;
2286 bus_dmamap_t old_map;
2287 int idx;
2289 idx = rx->cnt & rx->mask;
2290 rx->cnt++;
2292 /* Save a pointer to the received mbuf */
2293 m = rx->info[idx].m;
2295 /* Try to replace the received mbuf */
2296 if (mxge_get_buf_small(rx, rx->extra_map, idx, FALSE)) {
2297 /* Drop the frame -- the old mbuf is re-cycled */
2298 IFNET_STAT_INC(ifp, ierrors, 1);
2299 return;
2302 /* Unmap the received buffer */
2303 old_map = rx->info[idx].map;
2304 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2305 bus_dmamap_unload(rx->dmat, old_map);
2307 /* Swap the bus_dmamap_t's */
2308 rx->info[idx].map = rx->extra_map;
2309 rx->extra_map = old_map;
2312 * mcp implicitly skips 1st 2 bytes so that packet is properly
2313 * aligned
2315 m->m_data += MXGEFW_PAD;
2317 m->m_pkthdr.rcvif = ifp;
2318 m->m_len = m->m_pkthdr.len = len;
2320 IFNET_STAT_INC(ifp, ipackets, 1);
2322 eh = mtod(m, const struct ether_header *);
2323 if (eh->ether_type == htons(ETHERTYPE_VLAN))
2324 mxge_vlan_tag_remove(m, &csum);
2326 /* If the checksum is valid, mark it in the mbuf header */
2327 if ((ifp->if_capenable & IFCAP_RXCSUM) &&
2328 mxge_rx_csum(m, csum) == 0) {
2329 /* Tell the stack that the checksum is good */
2330 m->m_pkthdr.csum_data = 0xffff;
2331 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
2332 CSUM_DATA_VALID;
2334 ifp->if_input(ifp, m, NULL, -1);
2337 static __inline void
2338 mxge_clean_rx_done(struct ifnet *ifp, struct mxge_rx_data *rx_data, int cycle)
2340 mxge_rx_done_t *rx_done = &rx_data->rx_done;
2342 while (rx_done->entry[rx_done->idx].length != 0 && cycle != 0) {
2343 uint16_t length, checksum;
2345 length = ntohs(rx_done->entry[rx_done->idx].length);
2346 rx_done->entry[rx_done->idx].length = 0;
2348 checksum = rx_done->entry[rx_done->idx].checksum;
2350 if (length <= MXGE_RX_SMALL_BUFLEN) {
2351 mxge_rx_done_small(ifp, &rx_data->rx_small,
2352 length, checksum);
2353 } else {
2354 mxge_rx_done_big(ifp, &rx_data->rx_big,
2355 length, checksum);
2358 rx_done->idx++;
2359 rx_done->idx &= rx_done->mask;
2360 --cycle;
2364 static __inline void
2365 mxge_tx_done(struct ifnet *ifp, mxge_tx_ring_t *tx, uint32_t mcp_idx)
2367 ASSERT_SERIALIZED(&tx->tx_serialize);
2369 while (tx->pkt_done != mcp_idx) {
2370 struct mbuf *m;
2371 int idx;
2373 idx = tx->done & tx->mask;
2374 tx->done++;
2376 m = tx->info[idx].m;
2378 * mbuf and DMA map only attached to the first
2379 * segment per-mbuf.
2381 if (m != NULL) {
2382 tx->pkt_done++;
2383 IFNET_STAT_INC(ifp, opackets, 1);
2384 tx->info[idx].m = NULL;
2385 bus_dmamap_unload(tx->dmat, tx->info[idx].map);
2386 m_freem(m);
2391 * If we have space, clear OACTIVE to tell the stack that
2392 * its OK to send packets
2394 if (tx->req - tx->done < (tx->mask + 1) / 2) {
2395 ifsq_clr_oactive(tx->ifsq);
2396 if (tx->req == tx->done) {
2397 /* Reset watchdog */
2398 ifsq_watchdog_set_count(&tx->watchdog, 0);
2402 if (!ifsq_is_empty(tx->ifsq))
2403 ifsq_devstart(tx->ifsq);
2405 if (tx->send_stop != NULL && tx->req == tx->done) {
2407 * Let the NIC stop polling this queue, since there
2408 * are no more transmits pending
2410 *tx->send_stop = 1;
2411 tx->queue_active = 0;
2412 tx->deactivate++;
2413 wmb();
2417 static struct mxge_media_type mxge_xfp_media_types[] = {
2418 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2419 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2420 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2421 {IFM_NONE, (1 << 5), "10GBASE-ER"},
2422 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2423 {IFM_NONE, (1 << 3), "10GBASE-SW"},
2424 {IFM_NONE, (1 << 2), "10GBASE-LW"},
2425 {IFM_NONE, (1 << 1), "10GBASE-EW"},
2426 {IFM_NONE, (1 << 0), "Reserved"}
2429 static struct mxge_media_type mxge_sfp_media_types[] = {
2430 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"},
2431 {IFM_NONE, (1 << 7), "Reserved"},
2432 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2433 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2434 {IFM_10G_SR, (1 << 4), "10GBASE-SR"},
2435 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"}
2438 static void
2439 mxge_media_set(mxge_softc_t *sc, int media_type)
2441 int fc_opt = 0;
2443 if (media_type == IFM_NONE)
2444 return;
2446 if (sc->pause)
2447 fc_opt = IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
2449 ifmedia_add(&sc->media, MXGE_IFM | media_type, 0, NULL);
2450 ifmedia_set(&sc->media, MXGE_IFM | media_type | fc_opt);
2452 sc->current_media = media_type;
2455 static void
2456 mxge_media_unset(mxge_softc_t *sc)
2458 ifmedia_removeall(&sc->media);
2459 sc->current_media = IFM_NONE;
2462 static void
2463 mxge_media_init(mxge_softc_t *sc)
2465 const char *ptr;
2466 int i;
2468 mxge_media_unset(sc);
2471 * Parse the product code to deterimine the interface type
2472 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2473 * after the 3rd dash in the driver's cached copy of the
2474 * EEPROM's product code string.
2476 ptr = sc->product_code_string;
2477 if (ptr == NULL) {
2478 if_printf(sc->ifp, "Missing product code\n");
2479 return;
2482 for (i = 0; i < 3; i++, ptr++) {
2483 ptr = strchr(ptr, '-');
2484 if (ptr == NULL) {
2485 if_printf(sc->ifp, "only %d dashes in PC?!?\n", i);
2486 return;
2489 if (*ptr == 'C' || *(ptr +1) == 'C') {
2490 /* -C is CX4 */
2491 sc->connector = MXGE_CX4;
2492 mxge_media_set(sc, IFM_10G_CX4);
2493 } else if (*ptr == 'Q') {
2494 /* -Q is Quad Ribbon Fiber */
2495 sc->connector = MXGE_QRF;
2496 if_printf(sc->ifp, "Quad Ribbon Fiber Media\n");
2497 /* DragonFly has no media type for Quad ribbon fiber */
2498 } else if (*ptr == 'R') {
2499 /* -R is XFP */
2500 sc->connector = MXGE_XFP;
2501 /* NOTE: ifmedia will be installed later */
2502 } else if (*ptr == 'S' || *(ptr +1) == 'S') {
2503 /* -S or -2S is SFP+ */
2504 sc->connector = MXGE_SFP;
2505 /* NOTE: ifmedia will be installed later */
2506 } else {
2507 sc->connector = MXGE_UNK;
2508 if_printf(sc->ifp, "Unknown media type: %c\n", *ptr);
2513 * Determine the media type for a NIC. Some XFPs will identify
2514 * themselves only when their link is up, so this is initiated via a
2515 * link up interrupt. However, this can potentially take up to
2516 * several milliseconds, so it is run via the watchdog routine, rather
2517 * than in the interrupt handler itself.
2519 static void
2520 mxge_media_probe(mxge_softc_t *sc)
2522 mxge_cmd_t cmd;
2523 const char *cage_type;
2524 struct mxge_media_type *mxge_media_types = NULL;
2525 int i, err, ms, mxge_media_type_entries;
2526 uint32_t byte;
2528 sc->need_media_probe = 0;
2530 if (sc->connector == MXGE_XFP) {
2531 /* -R is XFP */
2532 mxge_media_types = mxge_xfp_media_types;
2533 mxge_media_type_entries = NELEM(mxge_xfp_media_types);
2534 byte = MXGE_XFP_COMPLIANCE_BYTE;
2535 cage_type = "XFP";
2536 } else if (sc->connector == MXGE_SFP) {
2537 /* -S or -2S is SFP+ */
2538 mxge_media_types = mxge_sfp_media_types;
2539 mxge_media_type_entries = NELEM(mxge_sfp_media_types);
2540 cage_type = "SFP+";
2541 byte = 3;
2542 } else {
2543 /* nothing to do; media type cannot change */
2544 return;
2548 * At this point we know the NIC has an XFP cage, so now we
2549 * try to determine what is in the cage by using the
2550 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2551 * register. We read just one byte, which may take over
2552 * a millisecond
2555 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */
2556 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2557 cmd.data1 = byte;
2558 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2559 if (err != MXGEFW_CMD_OK) {
2560 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE)
2561 if_printf(sc->ifp, "failed to read XFP\n");
2562 else if (err == MXGEFW_CMD_ERROR_I2C_ABSENT)
2563 if_printf(sc->ifp, "Type R/S with no XFP!?!?\n");
2564 else
2565 if_printf(sc->ifp, "I2C read failed, err: %d", err);
2566 mxge_media_unset(sc);
2567 return;
2570 /* Now we wait for the data to be cached */
2571 cmd.data0 = byte;
2572 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2573 for (ms = 0; err == EBUSY && ms < 50; ms++) {
2574 DELAY(1000);
2575 cmd.data0 = byte;
2576 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2578 if (err != MXGEFW_CMD_OK) {
2579 if_printf(sc->ifp, "failed to read %s (%d, %dms)\n",
2580 cage_type, err, ms);
2581 mxge_media_unset(sc);
2582 return;
2585 if (cmd.data0 == mxge_media_types[0].bitmask) {
2586 if (bootverbose) {
2587 if_printf(sc->ifp, "%s:%s\n", cage_type,
2588 mxge_media_types[0].name);
2590 if (sc->current_media != mxge_media_types[0].flag) {
2591 mxge_media_unset(sc);
2592 mxge_media_set(sc, mxge_media_types[0].flag);
2594 return;
2596 for (i = 1; i < mxge_media_type_entries; i++) {
2597 if (cmd.data0 & mxge_media_types[i].bitmask) {
2598 if (bootverbose) {
2599 if_printf(sc->ifp, "%s:%s\n", cage_type,
2600 mxge_media_types[i].name);
2603 if (sc->current_media != mxge_media_types[i].flag) {
2604 mxge_media_unset(sc);
2605 mxge_media_set(sc, mxge_media_types[i].flag);
2607 return;
2610 mxge_media_unset(sc);
2611 if (bootverbose) {
2612 if_printf(sc->ifp, "%s media 0x%x unknown\n", cage_type,
2613 cmd.data0);
2617 static void
2618 mxge_intr_status(struct mxge_softc *sc, const mcp_irq_data_t *stats)
2620 if (sc->link_state != stats->link_up) {
2621 sc->link_state = stats->link_up;
2622 if (sc->link_state) {
2623 sc->ifp->if_link_state = LINK_STATE_UP;
2624 if_link_state_change(sc->ifp);
2625 if (bootverbose)
2626 if_printf(sc->ifp, "link up\n");
2627 } else {
2628 sc->ifp->if_link_state = LINK_STATE_DOWN;
2629 if_link_state_change(sc->ifp);
2630 if (bootverbose)
2631 if_printf(sc->ifp, "link down\n");
2633 sc->need_media_probe = 1;
2636 if (sc->rdma_tags_available != be32toh(stats->rdma_tags_available)) {
2637 sc->rdma_tags_available = be32toh(stats->rdma_tags_available);
2638 if_printf(sc->ifp, "RDMA timed out! %d tags left\n",
2639 sc->rdma_tags_available);
2642 if (stats->link_down) {
2643 sc->down_cnt += stats->link_down;
2644 sc->link_state = 0;
2645 sc->ifp->if_link_state = LINK_STATE_DOWN;
2646 if_link_state_change(sc->ifp);
2650 static void
2651 mxge_serialize_skipmain(struct mxge_softc *sc)
2653 lwkt_serialize_array_enter(sc->serializes, sc->nserialize, 1);
2656 static void
2657 mxge_deserialize_skipmain(struct mxge_softc *sc)
2659 lwkt_serialize_array_exit(sc->serializes, sc->nserialize, 1);
2662 static void
2663 mxge_legacy(void *arg)
2665 struct mxge_slice_state *ss = arg;
2666 mxge_softc_t *sc = ss->sc;
2667 mcp_irq_data_t *stats = ss->fw_stats;
2668 mxge_tx_ring_t *tx = &ss->tx;
2669 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2670 uint32_t send_done_count;
2671 uint8_t valid;
2673 ASSERT_SERIALIZED(&sc->main_serialize);
2675 /* Make sure the DMA has finished */
2676 if (!stats->valid)
2677 return;
2678 valid = stats->valid;
2680 /* Lower legacy IRQ */
2681 *sc->irq_deassert = 0;
2682 if (!mxge_deassert_wait) {
2683 /* Don't wait for conf. that irq is low */
2684 stats->valid = 0;
2687 mxge_serialize_skipmain(sc);
2690 * Loop while waiting for legacy irq deassertion
2691 * XXX do we really want to loop?
2693 do {
2694 /* Check for transmit completes and receives */
2695 send_done_count = be32toh(stats->send_done_count);
2696 while ((send_done_count != tx->pkt_done) ||
2697 (rx_done->entry[rx_done->idx].length != 0)) {
2698 if (send_done_count != tx->pkt_done) {
2699 mxge_tx_done(&sc->arpcom.ac_if, tx,
2700 (int)send_done_count);
2702 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2703 send_done_count = be32toh(stats->send_done_count);
2705 if (mxge_deassert_wait)
2706 wmb();
2707 } while (*((volatile uint8_t *)&stats->valid));
2709 mxge_deserialize_skipmain(sc);
2711 /* Fw link & error stats meaningful only on the first slice */
2712 if (__predict_false(stats->stats_updated))
2713 mxge_intr_status(sc, stats);
2715 /* Check to see if we have rx token to pass back */
2716 if (valid & 0x1)
2717 *ss->irq_claim = be32toh(3);
2718 *(ss->irq_claim + 1) = be32toh(3);
2721 static void
2722 mxge_msi(void *arg)
2724 struct mxge_slice_state *ss = arg;
2725 mxge_softc_t *sc = ss->sc;
2726 mcp_irq_data_t *stats = ss->fw_stats;
2727 mxge_tx_ring_t *tx = &ss->tx;
2728 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2729 uint32_t send_done_count;
2730 uint8_t valid;
2731 #ifndef IFPOLL_ENABLE
2732 const boolean_t polling = FALSE;
2733 #else
2734 boolean_t polling = FALSE;
2735 #endif
2737 ASSERT_SERIALIZED(&sc->main_serialize);
2739 /* Make sure the DMA has finished */
2740 if (__predict_false(!stats->valid))
2741 return;
2743 valid = stats->valid;
2744 stats->valid = 0;
2746 #ifdef IFPOLL_ENABLE
2747 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2748 polling = TRUE;
2749 #endif
2751 if (!polling) {
2752 /* Check for receives */
2753 lwkt_serialize_enter(&ss->rx_data.rx_serialize);
2754 if (rx_done->entry[rx_done->idx].length != 0)
2755 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2756 lwkt_serialize_exit(&ss->rx_data.rx_serialize);
2760 * Check for transmit completes
2762 * NOTE:
2763 * Since pkt_done is only changed by mxge_tx_done(),
2764 * which is called only in interrupt handler, the
2765 * check w/o holding tx serializer is MPSAFE.
2767 send_done_count = be32toh(stats->send_done_count);
2768 if (send_done_count != tx->pkt_done) {
2769 lwkt_serialize_enter(&tx->tx_serialize);
2770 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count);
2771 lwkt_serialize_exit(&tx->tx_serialize);
2774 if (__predict_false(stats->stats_updated))
2775 mxge_intr_status(sc, stats);
2777 /* Check to see if we have rx token to pass back */
2778 if (!polling && (valid & 0x1))
2779 *ss->irq_claim = be32toh(3);
2780 *(ss->irq_claim + 1) = be32toh(3);
2783 static void
2784 mxge_msix_rx(void *arg)
2786 struct mxge_slice_state *ss = arg;
2787 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2789 #ifdef IFPOLL_ENABLE
2790 if (ss->sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2791 return;
2792 #endif
2794 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
2796 if (rx_done->entry[rx_done->idx].length != 0)
2797 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, -1);
2799 *ss->irq_claim = be32toh(3);
2802 static void
2803 mxge_msix_rxtx(void *arg)
2805 struct mxge_slice_state *ss = arg;
2806 mxge_softc_t *sc = ss->sc;
2807 mcp_irq_data_t *stats = ss->fw_stats;
2808 mxge_tx_ring_t *tx = &ss->tx;
2809 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
2810 uint32_t send_done_count;
2811 uint8_t valid;
2812 #ifndef IFPOLL_ENABLE
2813 const boolean_t polling = FALSE;
2814 #else
2815 boolean_t polling = FALSE;
2816 #endif
2818 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
2820 /* Make sure the DMA has finished */
2821 if (__predict_false(!stats->valid))
2822 return;
2824 valid = stats->valid;
2825 stats->valid = 0;
2827 #ifdef IFPOLL_ENABLE
2828 if (sc->arpcom.ac_if.if_flags & IFF_NPOLLING)
2829 polling = TRUE;
2830 #endif
2832 /* Check for receives */
2833 if (!polling && rx_done->entry[rx_done->idx].length != 0)
2834 mxge_clean_rx_done(&sc->arpcom.ac_if, &ss->rx_data, -1);
2837 * Check for transmit completes
2839 * NOTE:
2840 * Since pkt_done is only changed by mxge_tx_done(),
2841 * which is called only in interrupt handler, the
2842 * check w/o holding tx serializer is MPSAFE.
2844 send_done_count = be32toh(stats->send_done_count);
2845 if (send_done_count != tx->pkt_done) {
2846 lwkt_serialize_enter(&tx->tx_serialize);
2847 mxge_tx_done(&sc->arpcom.ac_if, tx, (int)send_done_count);
2848 lwkt_serialize_exit(&tx->tx_serialize);
2851 /* Check to see if we have rx token to pass back */
2852 if (!polling && (valid & 0x1))
2853 *ss->irq_claim = be32toh(3);
2854 *(ss->irq_claim + 1) = be32toh(3);
2857 static void
2858 mxge_init(void *arg)
2860 struct mxge_softc *sc = arg;
2862 ASSERT_IFNET_SERIALIZED_ALL(sc->ifp);
2863 if ((sc->ifp->if_flags & IFF_RUNNING) == 0)
2864 mxge_open(sc);
2867 static void
2868 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
2870 int i;
2872 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
2873 if (ss->rx_data.rx_big.info[i].m == NULL)
2874 continue;
2875 bus_dmamap_unload(ss->rx_data.rx_big.dmat,
2876 ss->rx_data.rx_big.info[i].map);
2877 m_freem(ss->rx_data.rx_big.info[i].m);
2878 ss->rx_data.rx_big.info[i].m = NULL;
2881 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
2882 if (ss->rx_data.rx_small.info[i].m == NULL)
2883 continue;
2884 bus_dmamap_unload(ss->rx_data.rx_small.dmat,
2885 ss->rx_data.rx_small.info[i].map);
2886 m_freem(ss->rx_data.rx_small.info[i].m);
2887 ss->rx_data.rx_small.info[i].m = NULL;
2890 /* Transmit ring used only on the first slice */
2891 if (ss->tx.info == NULL)
2892 return;
2894 for (i = 0; i <= ss->tx.mask; i++) {
2895 if (ss->tx.info[i].m == NULL)
2896 continue;
2897 bus_dmamap_unload(ss->tx.dmat, ss->tx.info[i].map);
2898 m_freem(ss->tx.info[i].m);
2899 ss->tx.info[i].m = NULL;
2903 static void
2904 mxge_free_mbufs(mxge_softc_t *sc)
2906 int slice;
2908 for (slice = 0; slice < sc->num_slices; slice++)
2909 mxge_free_slice_mbufs(&sc->ss[slice]);
2912 static void
2913 mxge_free_slice_rings(struct mxge_slice_state *ss)
2915 int i;
2917 if (ss->rx_data.rx_done.entry != NULL) {
2918 mxge_dma_free(&ss->rx_done_dma);
2919 ss->rx_data.rx_done.entry = NULL;
2922 if (ss->tx.req_list != NULL) {
2923 kfree(ss->tx.req_list, M_DEVBUF);
2924 ss->tx.req_list = NULL;
2927 if (ss->tx.seg_list != NULL) {
2928 kfree(ss->tx.seg_list, M_DEVBUF);
2929 ss->tx.seg_list = NULL;
2932 if (ss->rx_data.rx_small.shadow != NULL) {
2933 kfree(ss->rx_data.rx_small.shadow, M_DEVBUF);
2934 ss->rx_data.rx_small.shadow = NULL;
2937 if (ss->rx_data.rx_big.shadow != NULL) {
2938 kfree(ss->rx_data.rx_big.shadow, M_DEVBUF);
2939 ss->rx_data.rx_big.shadow = NULL;
2942 if (ss->tx.info != NULL) {
2943 if (ss->tx.dmat != NULL) {
2944 for (i = 0; i <= ss->tx.mask; i++) {
2945 bus_dmamap_destroy(ss->tx.dmat,
2946 ss->tx.info[i].map);
2948 bus_dma_tag_destroy(ss->tx.dmat);
2950 kfree(ss->tx.info, M_DEVBUF);
2951 ss->tx.info = NULL;
2954 if (ss->rx_data.rx_small.info != NULL) {
2955 if (ss->rx_data.rx_small.dmat != NULL) {
2956 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
2957 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
2958 ss->rx_data.rx_small.info[i].map);
2960 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
2961 ss->rx_data.rx_small.extra_map);
2962 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
2964 kfree(ss->rx_data.rx_small.info, M_DEVBUF);
2965 ss->rx_data.rx_small.info = NULL;
2968 if (ss->rx_data.rx_big.info != NULL) {
2969 if (ss->rx_data.rx_big.dmat != NULL) {
2970 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
2971 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
2972 ss->rx_data.rx_big.info[i].map);
2974 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
2975 ss->rx_data.rx_big.extra_map);
2976 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
2978 kfree(ss->rx_data.rx_big.info, M_DEVBUF);
2979 ss->rx_data.rx_big.info = NULL;
2983 static void
2984 mxge_free_rings(mxge_softc_t *sc)
2986 int slice;
2988 if (sc->ss == NULL)
2989 return;
2991 for (slice = 0; slice < sc->num_slices; slice++)
2992 mxge_free_slice_rings(&sc->ss[slice]);
2995 static int
2996 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
2997 int tx_ring_entries)
2999 mxge_softc_t *sc = ss->sc;
3000 size_t bytes;
3001 int err, i;
3004 * Allocate per-slice receive resources
3007 ss->rx_data.rx_small.mask = ss->rx_data.rx_big.mask =
3008 rx_ring_entries - 1;
3009 ss->rx_data.rx_done.mask = (2 * rx_ring_entries) - 1;
3011 /* Allocate the rx shadow rings */
3012 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.shadow);
3013 ss->rx_data.rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3015 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.shadow);
3016 ss->rx_data.rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3018 /* Allocate the rx host info rings */
3019 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_small.info);
3020 ss->rx_data.rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3022 bytes = rx_ring_entries * sizeof(*ss->rx_data.rx_big.info);
3023 ss->rx_data.rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3025 /* Allocate the rx busdma resources */
3026 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3027 1, /* alignment */
3028 4096, /* boundary */
3029 BUS_SPACE_MAXADDR, /* low */
3030 BUS_SPACE_MAXADDR, /* high */
3031 MHLEN, /* maxsize */
3032 1, /* num segs */
3033 MHLEN, /* maxsegsize */
3034 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
3035 /* flags */
3036 &ss->rx_data.rx_small.dmat); /* tag */
3037 if (err != 0) {
3038 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3039 err);
3040 return err;
3043 err = bus_dmamap_create(ss->rx_data.rx_small.dmat, BUS_DMA_WAITOK,
3044 &ss->rx_data.rx_small.extra_map);
3045 if (err != 0) {
3046 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", err);
3047 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
3048 ss->rx_data.rx_small.dmat = NULL;
3049 return err;
3051 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
3052 err = bus_dmamap_create(ss->rx_data.rx_small.dmat,
3053 BUS_DMA_WAITOK, &ss->rx_data.rx_small.info[i].map);
3054 if (err != 0) {
3055 int j;
3057 device_printf(sc->dev, "Err %d rx_small dmamap\n", err);
3059 for (j = 0; j < i; ++j) {
3060 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
3061 ss->rx_data.rx_small.info[j].map);
3063 bus_dmamap_destroy(ss->rx_data.rx_small.dmat,
3064 ss->rx_data.rx_small.extra_map);
3065 bus_dma_tag_destroy(ss->rx_data.rx_small.dmat);
3066 ss->rx_data.rx_small.dmat = NULL;
3067 return err;
3071 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3072 1, /* alignment */
3073 4096, /* boundary */
3074 BUS_SPACE_MAXADDR, /* low */
3075 BUS_SPACE_MAXADDR, /* high */
3076 4096, /* maxsize */
3077 1, /* num segs */
3078 4096, /* maxsegsize*/
3079 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
3080 /* flags */
3081 &ss->rx_data.rx_big.dmat); /* tag */
3082 if (err != 0) {
3083 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3084 err);
3085 return err;
3088 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK,
3089 &ss->rx_data.rx_big.extra_map);
3090 if (err != 0) {
3091 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", err);
3092 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
3093 ss->rx_data.rx_big.dmat = NULL;
3094 return err;
3096 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3097 err = bus_dmamap_create(ss->rx_data.rx_big.dmat, BUS_DMA_WAITOK,
3098 &ss->rx_data.rx_big.info[i].map);
3099 if (err != 0) {
3100 int j;
3102 device_printf(sc->dev, "Err %d rx_big dmamap\n", err);
3103 for (j = 0; j < i; ++j) {
3104 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
3105 ss->rx_data.rx_big.info[j].map);
3107 bus_dmamap_destroy(ss->rx_data.rx_big.dmat,
3108 ss->rx_data.rx_big.extra_map);
3109 bus_dma_tag_destroy(ss->rx_data.rx_big.dmat);
3110 ss->rx_data.rx_big.dmat = NULL;
3111 return err;
3116 * Now allocate TX resources
3119 ss->tx.mask = tx_ring_entries - 1;
3120 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3123 * Allocate the tx request copy block; MUST be at least 8 bytes
3124 * aligned
3126 bytes = sizeof(*ss->tx.req_list) * (ss->tx.max_desc + 4);
3127 ss->tx.req_list = kmalloc(__VM_CACHELINE_ALIGN(bytes),
3128 M_DEVBUF,
3129 M_WAITOK | M_CACHEALIGN);
3131 /* Allocate the tx busdma segment list */
3132 bytes = sizeof(*ss->tx.seg_list) * ss->tx.max_desc;
3133 ss->tx.seg_list = kmalloc(bytes, M_DEVBUF, M_WAITOK);
3135 /* Allocate the tx host info ring */
3136 bytes = tx_ring_entries * sizeof(*ss->tx.info);
3137 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3139 /* Allocate the tx busdma resources */
3140 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3141 1, /* alignment */
3142 sc->tx_boundary, /* boundary */
3143 BUS_SPACE_MAXADDR, /* low */
3144 BUS_SPACE_MAXADDR, /* high */
3145 IP_MAXPACKET +
3146 sizeof(struct ether_vlan_header),
3147 /* maxsize */
3148 ss->tx.max_desc - 2, /* num segs */
3149 sc->tx_boundary, /* maxsegsz */
3150 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW |
3151 BUS_DMA_ONEBPAGE, /* flags */
3152 &ss->tx.dmat); /* tag */
3153 if (err != 0) {
3154 device_printf(sc->dev, "Err %d allocating tx dmat\n", err);
3155 return err;
3159 * Now use these tags to setup DMA maps for each slot in the ring
3161 for (i = 0; i <= ss->tx.mask; i++) {
3162 err = bus_dmamap_create(ss->tx.dmat,
3163 BUS_DMA_WAITOK | BUS_DMA_ONEBPAGE, &ss->tx.info[i].map);
3164 if (err != 0) {
3165 int j;
3167 device_printf(sc->dev, "Err %d tx dmamap\n", err);
3168 for (j = 0; j < i; ++j) {
3169 bus_dmamap_destroy(ss->tx.dmat,
3170 ss->tx.info[j].map);
3172 bus_dma_tag_destroy(ss->tx.dmat);
3173 ss->tx.dmat = NULL;
3174 return err;
3177 return 0;
3180 static int
3181 mxge_alloc_rings(mxge_softc_t *sc)
3183 mxge_cmd_t cmd;
3184 int tx_ring_size;
3185 int tx_ring_entries, rx_ring_entries;
3186 int err, slice;
3188 /* Get ring sizes */
3189 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3190 if (err != 0) {
3191 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3192 return err;
3194 tx_ring_size = cmd.data0;
3196 tx_ring_entries = tx_ring_size / sizeof(mcp_kreq_ether_send_t);
3197 rx_ring_entries = sc->rx_intr_slots / 2;
3199 if (bootverbose) {
3200 device_printf(sc->dev, "tx desc %d, rx desc %d\n",
3201 tx_ring_entries, rx_ring_entries);
3204 sc->ifp->if_nmbclusters = rx_ring_entries * sc->num_slices;
3205 sc->ifp->if_nmbjclusters = sc->ifp->if_nmbclusters;
3207 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1);
3208 ifq_set_ready(&sc->ifp->if_snd);
3209 ifq_set_subq_cnt(&sc->ifp->if_snd, sc->num_tx_rings);
3211 if (sc->num_tx_rings > 1) {
3212 sc->ifp->if_mapsubq = ifq_mapsubq_modulo;
3213 ifq_set_subq_divisor(&sc->ifp->if_snd, sc->num_tx_rings);
3216 for (slice = 0; slice < sc->num_slices; slice++) {
3217 err = mxge_alloc_slice_rings(&sc->ss[slice],
3218 rx_ring_entries, tx_ring_entries);
3219 if (err != 0) {
3220 device_printf(sc->dev,
3221 "alloc %d slice rings failed\n", slice);
3222 return err;
3225 return 0;
3228 static void
3229 mxge_choose_params(int mtu, int *cl_size)
3231 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD;
3233 if (bufsize < MCLBYTES) {
3234 *cl_size = MCLBYTES;
3235 } else {
3236 KASSERT(bufsize < MJUMPAGESIZE, ("invalid MTU %d", mtu));
3237 *cl_size = MJUMPAGESIZE;
3241 static int
3242 mxge_slice_open(struct mxge_slice_state *ss, int cl_size)
3244 mxge_cmd_t cmd;
3245 int err, i, slice;
3247 slice = ss - ss->sc->ss;
3250 * Get the lanai pointers to the send and receive rings
3252 err = 0;
3254 bzero(&cmd, sizeof(cmd)); /* silence gcc warning */
3255 if (ss->sc->num_tx_rings == 1) {
3256 if (slice == 0) {
3257 cmd.data0 = slice;
3258 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET,
3259 &cmd);
3260 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *)
3261 (ss->sc->sram + cmd.data0);
3262 /* Leave send_go and send_stop as NULL */
3264 } else {
3265 cmd.data0 = slice;
3266 err = mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3267 ss->tx.lanai = (volatile mcp_kreq_ether_send_t *)
3268 (ss->sc->sram + cmd.data0);
3269 ss->tx.send_go = (volatile uint32_t *)
3270 (ss->sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3271 ss->tx.send_stop = (volatile uint32_t *)
3272 (ss->sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3275 cmd.data0 = slice;
3276 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3277 ss->rx_data.rx_small.lanai =
3278 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0);
3280 cmd.data0 = slice;
3281 err |= mxge_send_cmd(ss->sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3282 ss->rx_data.rx_big.lanai =
3283 (volatile mcp_kreq_ether_recv_t *)(ss->sc->sram + cmd.data0);
3285 if (err != 0) {
3286 if_printf(ss->sc->ifp,
3287 "failed to get ring sizes or locations\n");
3288 return EIO;
3292 * Stock small receive ring
3294 for (i = 0; i <= ss->rx_data.rx_small.mask; i++) {
3295 err = mxge_get_buf_small(&ss->rx_data.rx_small,
3296 ss->rx_data.rx_small.info[i].map, i, TRUE);
3297 if (err) {
3298 if_printf(ss->sc->ifp, "alloced %d/%d smalls\n", i,
3299 ss->rx_data.rx_small.mask + 1);
3300 return ENOMEM;
3305 * Stock big receive ring
3307 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3308 ss->rx_data.rx_big.shadow[i].addr_low = 0xffffffff;
3309 ss->rx_data.rx_big.shadow[i].addr_high = 0xffffffff;
3312 ss->rx_data.rx_big.cl_size = cl_size;
3314 for (i = 0; i <= ss->rx_data.rx_big.mask; i++) {
3315 err = mxge_get_buf_big(&ss->rx_data.rx_big,
3316 ss->rx_data.rx_big.info[i].map, i, TRUE);
3317 if (err) {
3318 if_printf(ss->sc->ifp, "alloced %d/%d bigs\n", i,
3319 ss->rx_data.rx_big.mask + 1);
3320 return ENOMEM;
3323 return 0;
3326 static int
3327 mxge_open(mxge_softc_t *sc)
3329 struct ifnet *ifp = sc->ifp;
3330 mxge_cmd_t cmd;
3331 int err, slice, cl_size, i;
3332 bus_addr_t bus;
3333 volatile uint8_t *itable;
3334 struct mxge_slice_state *ss;
3336 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3338 /* Copy the MAC address in case it was overridden */
3339 bcopy(IF_LLADDR(ifp), sc->mac_addr, ETHER_ADDR_LEN);
3341 err = mxge_reset(sc, 1);
3342 if (err != 0) {
3343 if_printf(ifp, "failed to reset\n");
3344 return EIO;
3347 if (sc->num_slices > 1) {
3349 * Setup the indirect table.
3351 if_ringmap_rdrtable(sc->ring_map, sc->rdr_table, NETISR_CPUMAX);
3353 cmd.data0 = NETISR_CPUMAX;
3354 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, &cmd);
3356 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, &cmd);
3357 if (err != 0) {
3358 if_printf(ifp, "failed to setup rss tables\n");
3359 return err;
3362 itable = sc->sram + cmd.data0;
3363 for (i = 0; i < NETISR_CPUMAX; i++)
3364 itable[i] = sc->rdr_table[i];
3366 if (sc->use_rss) {
3367 volatile uint8_t *hwkey;
3368 uint8_t swkey[MXGE_HWRSS_KEYLEN];
3371 * Setup Toeplitz key.
3373 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_KEY_OFFSET,
3374 &cmd);
3375 if (err != 0) {
3376 if_printf(ifp, "failed to get rsskey\n");
3377 return err;
3379 hwkey = sc->sram + cmd.data0;
3381 toeplitz_get_key(swkey, MXGE_HWRSS_KEYLEN);
3382 for (i = 0; i < MXGE_HWRSS_KEYLEN; ++i)
3383 hwkey[i] = swkey[i];
3384 wmb();
3386 err = mxge_send_cmd(sc, MXGEFW_CMD_RSS_KEY_UPDATED,
3387 &cmd);
3388 if (err != 0) {
3389 if_printf(ifp, "failed to update rsskey\n");
3390 return err;
3392 if (bootverbose)
3393 if_printf(ifp, "RSS key updated\n");
3396 cmd.data0 = 1;
3397 if (sc->use_rss) {
3398 if (bootverbose)
3399 if_printf(ifp, "input hash: RSS\n");
3400 cmd.data1 = MXGEFW_RSS_HASH_TYPE_IPV4 |
3401 MXGEFW_RSS_HASH_TYPE_TCP_IPV4;
3402 } else {
3403 if (bootverbose)
3404 if_printf(ifp, "input hash: SRC_DST_PORT\n");
3405 cmd.data1 = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
3407 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3408 if (err != 0) {
3409 if_printf(ifp, "failed to enable slices\n");
3410 return err;
3414 cmd.data0 = MXGEFW_TSO_MODE_NDIS;
3415 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_TSO_MODE, &cmd);
3416 if (err) {
3418 * Can't change TSO mode to NDIS, never allow TSO then
3420 if_printf(ifp, "failed to set TSO mode\n");
3421 ifp->if_capenable &= ~IFCAP_TSO;
3422 ifp->if_capabilities &= ~IFCAP_TSO;
3423 ifp->if_hwassist &= ~CSUM_TSO;
3426 mxge_choose_params(ifp->if_mtu, &cl_size);
3428 cmd.data0 = 1;
3429 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, &cmd);
3431 * Error is only meaningful if we're trying to set
3432 * MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1
3436 * Give the firmware the mtu and the big and small buffer
3437 * sizes. The firmware wants the big buf size to be a power
3438 * of two. Luckily, DragonFly's clusters are powers of two
3440 cmd.data0 = ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3441 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3443 cmd.data0 = MXGE_RX_SMALL_BUFLEN;
3444 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, &cmd);
3446 cmd.data0 = cl_size;
3447 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3449 if (err != 0) {
3450 if_printf(ifp, "failed to setup params\n");
3451 goto abort;
3454 /* Now give him the pointer to the stats block */
3455 for (slice = 0; slice < sc->num_slices; slice++) {
3456 ss = &sc->ss[slice];
3457 cmd.data0 = MXGE_LOWPART_TO_U32(ss->fw_stats_dma.dmem_busaddr);
3458 cmd.data1 = MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.dmem_busaddr);
3459 cmd.data2 = sizeof(struct mcp_irq_data);
3460 cmd.data2 |= (slice << 16);
3461 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3464 if (err != 0) {
3465 bus = sc->ss->fw_stats_dma.dmem_busaddr;
3466 bus += offsetof(struct mcp_irq_data, send_done_count);
3467 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3468 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3469 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3470 &cmd);
3472 /* Firmware cannot support multicast without STATS_DMA_V2 */
3473 sc->fw_multicast_support = 0;
3474 } else {
3475 sc->fw_multicast_support = 1;
3478 if (err != 0) {
3479 if_printf(ifp, "failed to setup params\n");
3480 goto abort;
3483 for (slice = 0; slice < sc->num_slices; slice++) {
3484 err = mxge_slice_open(&sc->ss[slice], cl_size);
3485 if (err != 0) {
3486 if_printf(ifp, "couldn't open slice %d\n", slice);
3487 goto abort;
3491 /* Finally, start the firmware running */
3492 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3493 if (err) {
3494 if_printf(ifp, "Couldn't bring up link\n");
3495 goto abort;
3498 ifp->if_flags |= IFF_RUNNING;
3499 for (i = 0; i < sc->num_tx_rings; ++i) {
3500 mxge_tx_ring_t *tx = &sc->ss[i].tx;
3502 ifsq_clr_oactive(tx->ifsq);
3503 ifsq_watchdog_start(&tx->watchdog);
3506 return 0;
3508 abort:
3509 mxge_free_mbufs(sc);
3510 return err;
3513 static void
3514 mxge_close(mxge_softc_t *sc, int down)
3516 struct ifnet *ifp = sc->ifp;
3517 mxge_cmd_t cmd;
3518 int err, old_down_cnt, i;
3520 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3522 if (!down) {
3523 old_down_cnt = sc->down_cnt;
3524 wmb();
3526 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3527 if (err)
3528 if_printf(ifp, "Couldn't bring down link\n");
3530 if (old_down_cnt == sc->down_cnt) {
3532 * Wait for down irq
3533 * XXX racy
3535 ifnet_deserialize_all(ifp);
3536 DELAY(10 * sc->intr_coal_delay);
3537 ifnet_serialize_all(ifp);
3540 wmb();
3541 if (old_down_cnt == sc->down_cnt)
3542 if_printf(ifp, "never got down irq\n");
3544 mxge_free_mbufs(sc);
3546 ifp->if_flags &= ~IFF_RUNNING;
3547 for (i = 0; i < sc->num_tx_rings; ++i) {
3548 mxge_tx_ring_t *tx = &sc->ss[i].tx;
3550 ifsq_clr_oactive(tx->ifsq);
3551 ifsq_watchdog_stop(&tx->watchdog);
3555 static void
3556 mxge_setup_cfg_space(mxge_softc_t *sc)
3558 device_t dev = sc->dev;
3559 int reg;
3560 uint16_t lnk, pectl;
3562 /* Find the PCIe link width and set max read request to 4KB */
3563 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3564 lnk = pci_read_config(dev, reg + 0x12, 2);
3565 sc->link_width = (lnk >> 4) & 0x3f;
3567 if (sc->pectl == 0) {
3568 pectl = pci_read_config(dev, reg + 0x8, 2);
3569 pectl = (pectl & ~0x7000) | (5 << 12);
3570 pci_write_config(dev, reg + 0x8, pectl, 2);
3571 sc->pectl = pectl;
3572 } else {
3573 /* Restore saved pectl after watchdog reset */
3574 pci_write_config(dev, reg + 0x8, sc->pectl, 2);
3578 /* Enable DMA and memory space access */
3579 pci_enable_busmaster(dev);
3582 static uint32_t
3583 mxge_read_reboot(mxge_softc_t *sc)
3585 device_t dev = sc->dev;
3586 uint32_t vs;
3588 /* Find the vendor specific offset */
3589 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3590 if_printf(sc->ifp, "could not find vendor specific offset\n");
3591 return (uint32_t)-1;
3593 /* Enable read32 mode */
3594 pci_write_config(dev, vs + 0x10, 0x3, 1);
3595 /* Tell NIC which register to read */
3596 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3597 return pci_read_config(dev, vs + 0x14, 4);
3600 static void
3601 mxge_watchdog_reset(mxge_softc_t *sc)
3603 struct pci_devinfo *dinfo;
3604 int err, running;
3605 uint32_t reboot;
3606 uint16_t cmd;
3608 err = ENXIO;
3610 if_printf(sc->ifp, "Watchdog reset!\n");
3613 * Check to see if the NIC rebooted. If it did, then all of
3614 * PCI config space has been reset, and things like the
3615 * busmaster bit will be zero. If this is the case, then we
3616 * must restore PCI config space before the NIC can be used
3617 * again
3619 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3620 if (cmd == 0xffff) {
3622 * Maybe the watchdog caught the NIC rebooting; wait
3623 * up to 100ms for it to finish. If it does not come
3624 * back, then give up
3626 DELAY(1000*100);
3627 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3628 if (cmd == 0xffff)
3629 if_printf(sc->ifp, "NIC disappeared!\n");
3631 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3632 /* Print the reboot status */
3633 reboot = mxge_read_reboot(sc);
3634 if_printf(sc->ifp, "NIC rebooted, status = 0x%x\n", reboot);
3636 running = sc->ifp->if_flags & IFF_RUNNING;
3637 if (running) {
3639 * Quiesce NIC so that TX routines will not try to
3640 * xmit after restoration of BAR
3643 /* Mark the link as down */
3644 if (sc->link_state) {
3645 sc->ifp->if_link_state = LINK_STATE_DOWN;
3646 if_link_state_change(sc->ifp);
3648 mxge_close(sc, 1);
3650 /* Restore PCI configuration space */
3651 dinfo = device_get_ivars(sc->dev);
3652 pci_cfg_restore(sc->dev, dinfo);
3654 /* And redo any changes we made to our config space */
3655 mxge_setup_cfg_space(sc);
3657 /* Reload f/w */
3658 err = mxge_load_firmware(sc, 0);
3659 if (err)
3660 if_printf(sc->ifp, "Unable to re-load f/w\n");
3661 if (running && !err) {
3662 int i;
3664 err = mxge_open(sc);
3666 for (i = 0; i < sc->num_tx_rings; ++i)
3667 ifsq_devstart_sched(sc->ss[i].tx.ifsq);
3669 sc->watchdog_resets++;
3670 } else {
3671 if_printf(sc->ifp, "NIC did not reboot, not resetting\n");
3672 err = 0;
3674 if (err) {
3675 if_printf(sc->ifp, "watchdog reset failed\n");
3676 } else {
3677 if (sc->dying == 2)
3678 sc->dying = 0;
3679 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3683 static void
3684 mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice)
3686 if_printf(sc->ifp, "slice %d struck? ring state:\n", slice);
3687 if_printf(sc->ifp, "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3688 tx->req, tx->done, tx->queue_active);
3689 if_printf(sc->ifp, "tx.activate=%d tx.deactivate=%d\n",
3690 tx->activate, tx->deactivate);
3691 if_printf(sc->ifp, "pkt_done=%d fw=%d\n",
3692 tx->pkt_done, be32toh(sc->ss->fw_stats->send_done_count));
3695 static u_long
3696 mxge_update_stats(mxge_softc_t *sc)
3698 u_long ipackets, opackets, pkts;
3700 IFNET_STAT_GET(sc->ifp, ipackets, ipackets);
3701 IFNET_STAT_GET(sc->ifp, opackets, opackets);
3703 pkts = ipackets - sc->ipackets;
3704 pkts += opackets - sc->opackets;
3706 sc->ipackets = ipackets;
3707 sc->opackets = opackets;
3709 return pkts;
3712 static void
3713 mxge_tick(void *arg)
3715 mxge_softc_t *sc = arg;
3716 u_long pkts = 0;
3717 int err = 0;
3718 int ticks;
3720 lwkt_serialize_enter(&sc->main_serialize);
3722 ticks = mxge_ticks;
3723 if (sc->ifp->if_flags & IFF_RUNNING) {
3724 /* Aggregate stats from different slices */
3725 pkts = mxge_update_stats(sc);
3726 if (sc->need_media_probe)
3727 mxge_media_probe(sc);
3729 if (pkts == 0) {
3730 uint16_t cmd;
3732 /* Ensure NIC did not suffer h/w fault while idle */
3733 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3734 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3735 sc->dying = 2;
3736 mxge_serialize_skipmain(sc);
3737 mxge_watchdog_reset(sc);
3738 mxge_deserialize_skipmain(sc);
3739 err = ENXIO;
3742 /* Look less often if NIC is idle */
3743 ticks *= 4;
3746 if (err == 0)
3747 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc);
3749 lwkt_serialize_exit(&sc->main_serialize);
3752 static int
3753 mxge_media_change(struct ifnet *ifp)
3755 mxge_softc_t *sc = ifp->if_softc;
3756 const struct ifmedia *ifm = &sc->media;
3757 int pause;
3759 if (IFM_OPTIONS(ifm->ifm_media) & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
3760 if (sc->pause)
3761 return 0;
3762 pause = 1;
3763 } else {
3764 if (!sc->pause)
3765 return 0;
3766 pause = 0;
3768 return mxge_change_pause(sc, pause);
3771 static int
3772 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3774 struct ifnet *ifp = sc->ifp;
3775 int real_mtu, old_mtu;
3776 int err = 0;
3778 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3779 if (mtu > sc->max_mtu || real_mtu < 60)
3780 return EINVAL;
3782 old_mtu = ifp->if_mtu;
3783 ifp->if_mtu = mtu;
3784 if (ifp->if_flags & IFF_RUNNING) {
3785 mxge_close(sc, 0);
3786 err = mxge_open(sc);
3787 if (err != 0) {
3788 ifp->if_mtu = old_mtu;
3789 mxge_close(sc, 0);
3790 mxge_open(sc);
3793 return err;
3796 static void
3797 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3799 mxge_softc_t *sc = ifp->if_softc;
3801 ifmr->ifm_status = IFM_AVALID;
3802 ifmr->ifm_active = IFM_ETHER;
3804 if (sc->link_state)
3805 ifmr->ifm_status |= IFM_ACTIVE;
3808 * Autoselect is not supported, so the current media
3809 * should be delivered.
3811 ifmr->ifm_active |= sc->current_media;
3812 if (sc->current_media != IFM_NONE) {
3813 ifmr->ifm_active |= MXGE_IFM;
3814 if (sc->pause)
3815 ifmr->ifm_active |= IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE;
3819 static int
3820 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data,
3821 struct ucred *cr __unused)
3823 mxge_softc_t *sc = ifp->if_softc;
3824 struct ifreq *ifr = (struct ifreq *)data;
3825 int err, mask;
3827 ASSERT_IFNET_SERIALIZED_ALL(ifp);
3828 err = 0;
3830 switch (command) {
3831 case SIOCSIFMTU:
3832 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3833 break;
3835 case SIOCSIFFLAGS:
3836 if (sc->dying)
3837 return EINVAL;
3839 if (ifp->if_flags & IFF_UP) {
3840 if (!(ifp->if_flags & IFF_RUNNING)) {
3841 err = mxge_open(sc);
3842 } else {
3844 * Take care of PROMISC and ALLMULTI
3845 * flag changes
3847 mxge_change_promisc(sc,
3848 ifp->if_flags & IFF_PROMISC);
3849 mxge_set_multicast_list(sc);
3851 } else {
3852 if (ifp->if_flags & IFF_RUNNING)
3853 mxge_close(sc, 0);
3855 break;
3857 case SIOCADDMULTI:
3858 case SIOCDELMULTI:
3859 mxge_set_multicast_list(sc);
3860 break;
3862 case SIOCSIFCAP:
3863 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3864 if (mask & IFCAP_TXCSUM) {
3865 ifp->if_capenable ^= IFCAP_TXCSUM;
3866 if (ifp->if_capenable & IFCAP_TXCSUM)
3867 ifp->if_hwassist |= CSUM_TCP | CSUM_UDP;
3868 else
3869 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
3871 if (mask & IFCAP_TSO) {
3872 ifp->if_capenable ^= IFCAP_TSO;
3873 if (ifp->if_capenable & IFCAP_TSO)
3874 ifp->if_hwassist |= CSUM_TSO;
3875 else
3876 ifp->if_hwassist &= ~CSUM_TSO;
3878 if (mask & IFCAP_RXCSUM)
3879 ifp->if_capenable ^= IFCAP_RXCSUM;
3880 if (mask & IFCAP_VLAN_HWTAGGING)
3881 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3882 break;
3884 case SIOCGIFMEDIA:
3885 case SIOCSIFMEDIA:
3886 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
3887 &sc->media, command);
3888 break;
3890 default:
3891 err = ether_ioctl(ifp, command, data);
3892 break;
3894 return err;
3897 static void
3898 mxge_fetch_tunables(mxge_softc_t *sc)
3900 int ifm;
3902 sc->intr_coal_delay = mxge_intr_coal_delay;
3903 if (sc->intr_coal_delay < 0 || sc->intr_coal_delay > (10 * 1000))
3904 sc->intr_coal_delay = MXGE_INTR_COAL_DELAY;
3906 /* XXX */
3907 if (mxge_ticks == 0)
3908 mxge_ticks = hz / 2;
3910 ifm = ifmedia_str2ethfc(mxge_flowctrl);
3911 if (ifm & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE))
3912 sc->pause = 1;
3914 sc->use_rss = mxge_use_rss;
3916 sc->throttle = mxge_throttle;
3917 if (sc->throttle && sc->throttle > MXGE_MAX_THROTTLE)
3918 sc->throttle = MXGE_MAX_THROTTLE;
3919 if (sc->throttle && sc->throttle < MXGE_MIN_THROTTLE)
3920 sc->throttle = MXGE_MIN_THROTTLE;
3923 static void
3924 mxge_free_slices(mxge_softc_t *sc)
3926 struct mxge_slice_state *ss;
3927 int i;
3929 if (sc->ss == NULL)
3930 return;
3932 for (i = 0; i < sc->num_slices; i++) {
3933 ss = &sc->ss[i];
3934 if (ss->fw_stats != NULL) {
3935 mxge_dma_free(&ss->fw_stats_dma);
3936 ss->fw_stats = NULL;
3938 if (ss->rx_data.rx_done.entry != NULL) {
3939 mxge_dma_free(&ss->rx_done_dma);
3940 ss->rx_data.rx_done.entry = NULL;
3943 kfree(sc->ss, M_DEVBUF);
3944 sc->ss = NULL;
3947 static int
3948 mxge_alloc_slices(mxge_softc_t *sc)
3950 mxge_cmd_t cmd;
3951 struct mxge_slice_state *ss;
3952 size_t bytes;
3953 int err, i, rx_ring_size;
3955 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
3956 if (err != 0) {
3957 device_printf(sc->dev, "Cannot determine rx ring size\n");
3958 return err;
3960 rx_ring_size = cmd.data0;
3961 sc->rx_intr_slots = 2 * (rx_ring_size / sizeof (mcp_dma_addr_t));
3963 bytes = sizeof(*sc->ss) * sc->num_slices;
3964 sc->ss = kmalloc(bytes, M_DEVBUF,
3965 M_WAITOK | M_ZERO | M_CACHEALIGN);
3967 for (i = 0; i < sc->num_slices; i++) {
3968 ss = &sc->ss[i];
3970 ss->sc = sc;
3972 lwkt_serialize_init(&ss->rx_data.rx_serialize);
3973 lwkt_serialize_init(&ss->tx.tx_serialize);
3974 ss->intr_rid = -1;
3977 * Allocate per-slice rx interrupt queue
3978 * XXX assume 4bytes mcp_slot
3980 bytes = sc->rx_intr_slots * sizeof(mcp_slot_t);
3981 err = mxge_dma_alloc(sc, &ss->rx_done_dma, bytes, 4096);
3982 if (err != 0) {
3983 device_printf(sc->dev,
3984 "alloc %d slice rx_done failed\n", i);
3985 return err;
3987 ss->rx_data.rx_done.entry = ss->rx_done_dma.dmem_addr;
3990 * Allocate the per-slice firmware stats
3992 bytes = sizeof(*ss->fw_stats);
3993 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
3994 sizeof(*ss->fw_stats), 64);
3995 if (err != 0) {
3996 device_printf(sc->dev,
3997 "alloc %d fw_stats failed\n", i);
3998 return err;
4000 ss->fw_stats = ss->fw_stats_dma.dmem_addr;
4002 return 0;
4005 static void
4006 mxge_slice_probe(mxge_softc_t *sc)
4008 int status, max_intr_slots, max_slices, num_slices;
4009 int msix_cnt, msix_enable, multi_tx;
4010 mxge_cmd_t cmd;
4011 const char *old_fw;
4013 sc->num_slices = 1;
4014 sc->num_tx_rings = 1;
4016 num_slices = device_getenv_int(sc->dev, "num_slices", mxge_num_slices);
4017 if (num_slices == 1)
4018 return;
4020 if (netisr_ncpus == 1)
4021 return;
4023 msix_enable = device_getenv_int(sc->dev, "msix.enable",
4024 mxge_msix_enable);
4025 if (!msix_enable)
4026 return;
4028 msix_cnt = pci_msix_count(sc->dev);
4029 if (msix_cnt < 2)
4030 return;
4031 if (bootverbose)
4032 device_printf(sc->dev, "MSI-X count %d\n", msix_cnt);
4035 * Now load the slice aware firmware see what it supports
4037 old_fw = sc->fw_name;
4038 if (old_fw == mxge_fw_aligned)
4039 sc->fw_name = mxge_fw_rss_aligned;
4040 else
4041 sc->fw_name = mxge_fw_rss_unaligned;
4042 status = mxge_load_firmware(sc, 0);
4043 if (status != 0) {
4044 device_printf(sc->dev, "Falling back to a single slice\n");
4045 return;
4049 * Try to send a reset command to the card to see if it is alive
4051 memset(&cmd, 0, sizeof(cmd));
4052 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4053 if (status != 0) {
4054 device_printf(sc->dev, "failed reset\n");
4055 goto abort_with_fw;
4059 * Get rx ring size to calculate rx interrupt queue size
4061 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4062 if (status != 0) {
4063 device_printf(sc->dev, "Cannot determine rx ring size\n");
4064 goto abort_with_fw;
4066 max_intr_slots = 2 * (cmd.data0 / sizeof(mcp_dma_addr_t));
4069 * Tell it the size of the rx interrupt queue
4071 cmd.data0 = max_intr_slots * sizeof(struct mcp_slot);
4072 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4073 if (status != 0) {
4074 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4075 goto abort_with_fw;
4079 * Ask the maximum number of slices it supports
4081 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4082 if (status != 0) {
4083 device_printf(sc->dev,
4084 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4085 goto abort_with_fw;
4087 max_slices = cmd.data0;
4088 if (bootverbose)
4089 device_printf(sc->dev, "max slices %d\n", max_slices);
4091 if (max_slices > msix_cnt)
4092 max_slices = msix_cnt;
4094 sc->ring_map = if_ringmap_alloc(sc->dev, num_slices, max_slices);
4095 sc->num_slices = if_ringmap_count(sc->ring_map);
4097 multi_tx = device_getenv_int(sc->dev, "multi_tx", mxge_multi_tx);
4098 if (multi_tx)
4099 sc->num_tx_rings = sc->num_slices;
4101 if (bootverbose) {
4102 device_printf(sc->dev, "using %d slices, max %d\n",
4103 sc->num_slices, max_slices);
4106 if (sc->num_slices == 1)
4107 goto abort_with_fw;
4108 return;
4110 abort_with_fw:
4111 sc->fw_name = old_fw;
4112 mxge_load_firmware(sc, 0);
4115 static void
4116 mxge_setup_serialize(struct mxge_softc *sc)
4118 int i = 0, slice;
4120 /* Main + rx + tx */
4121 sc->nserialize = (2 * sc->num_slices) + 1;
4122 sc->serializes =
4123 kmalloc(sc->nserialize * sizeof(struct lwkt_serialize *),
4124 M_DEVBUF, M_WAITOK | M_ZERO);
4127 * Setup serializes
4129 * NOTE: Order is critical
4132 KKASSERT(i < sc->nserialize);
4133 sc->serializes[i++] = &sc->main_serialize;
4135 for (slice = 0; slice < sc->num_slices; ++slice) {
4136 KKASSERT(i < sc->nserialize);
4137 sc->serializes[i++] = &sc->ss[slice].rx_data.rx_serialize;
4140 for (slice = 0; slice < sc->num_slices; ++slice) {
4141 KKASSERT(i < sc->nserialize);
4142 sc->serializes[i++] = &sc->ss[slice].tx.tx_serialize;
4145 KKASSERT(i == sc->nserialize);
4148 static void
4149 mxge_serialize(struct ifnet *ifp, enum ifnet_serialize slz)
4151 struct mxge_softc *sc = ifp->if_softc;
4153 ifnet_serialize_array_enter(sc->serializes, sc->nserialize, slz);
4156 static void
4157 mxge_deserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4159 struct mxge_softc *sc = ifp->if_softc;
4161 ifnet_serialize_array_exit(sc->serializes, sc->nserialize, slz);
4164 static int
4165 mxge_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz)
4167 struct mxge_softc *sc = ifp->if_softc;
4169 return ifnet_serialize_array_try(sc->serializes, sc->nserialize, slz);
4172 #ifdef INVARIANTS
4174 static void
4175 mxge_serialize_assert(struct ifnet *ifp, enum ifnet_serialize slz,
4176 boolean_t serialized)
4178 struct mxge_softc *sc = ifp->if_softc;
4180 ifnet_serialize_array_assert(sc->serializes, sc->nserialize,
4181 slz, serialized);
4184 #endif /* INVARIANTS */
4186 #ifdef IFPOLL_ENABLE
4188 static void
4189 mxge_npoll_rx(struct ifnet *ifp, void *xss, int cycle)
4191 struct mxge_slice_state *ss = xss;
4192 mxge_rx_done_t *rx_done = &ss->rx_data.rx_done;
4194 ASSERT_SERIALIZED(&ss->rx_data.rx_serialize);
4196 if (rx_done->entry[rx_done->idx].length != 0) {
4197 mxge_clean_rx_done(&ss->sc->arpcom.ac_if, &ss->rx_data, cycle);
4198 } else {
4200 * XXX
4201 * This register writting obviously has cost,
4202 * however, if we don't hand back the rx token,
4203 * the upcoming packets may suffer rediculously
4204 * large delay, as observed on 8AL-C using ping(8).
4206 *ss->irq_claim = be32toh(3);
4210 static void
4211 mxge_npoll(struct ifnet *ifp, struct ifpoll_info *info)
4213 struct mxge_softc *sc = ifp->if_softc;
4214 int i;
4216 if (info == NULL)
4217 return;
4220 * Only poll rx; polling tx and status don't seem to work
4222 for (i = 0; i < sc->num_slices; ++i) {
4223 struct mxge_slice_state *ss = &sc->ss[i];
4224 int cpu = ss->intr_cpuid;
4226 KKASSERT(cpu < netisr_ncpus);
4227 info->ifpi_rx[cpu].poll_func = mxge_npoll_rx;
4228 info->ifpi_rx[cpu].arg = ss;
4229 info->ifpi_rx[cpu].serializer = &ss->rx_data.rx_serialize;
4233 #endif /* IFPOLL_ENABLE */
4235 static int
4236 mxge_attach(device_t dev)
4238 mxge_softc_t *sc = device_get_softc(dev);
4239 struct ifnet *ifp = &sc->arpcom.ac_if;
4240 int err, rid, i;
4243 * Avoid rewriting half the lines in this file to use
4244 * &sc->arpcom.ac_if instead
4246 sc->ifp = ifp;
4247 sc->dev = dev;
4248 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4250 /* IFM_ETH_FORCEPAUSE can't be changed */
4251 ifmedia_init(&sc->media, IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE,
4252 mxge_media_change, mxge_media_status);
4254 lwkt_serialize_init(&sc->main_serialize);
4256 mxge_fetch_tunables(sc);
4258 err = bus_dma_tag_create(NULL, /* parent */
4259 1, /* alignment */
4260 0, /* boundary */
4261 BUS_SPACE_MAXADDR, /* low */
4262 BUS_SPACE_MAXADDR, /* high */
4263 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
4264 0, /* num segs */
4265 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
4266 0, /* flags */
4267 &sc->parent_dmat); /* tag */
4268 if (err != 0) {
4269 device_printf(dev, "Err %d allocating parent dmat\n", err);
4270 goto failed;
4273 callout_init_mp(&sc->co_hdl);
4275 mxge_setup_cfg_space(sc);
4278 * Map the board into the kernel
4280 rid = PCIR_BARS;
4281 sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
4282 &rid, RF_ACTIVE);
4283 if (sc->mem_res == NULL) {
4284 device_printf(dev, "could not map memory\n");
4285 err = ENXIO;
4286 goto failed;
4289 sc->sram = rman_get_virtual(sc->mem_res);
4290 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4291 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4292 device_printf(dev, "impossible memory region size %ld\n",
4293 rman_get_size(sc->mem_res));
4294 err = ENXIO;
4295 goto failed;
4299 * Make NULL terminated copy of the EEPROM strings section of
4300 * lanai SRAM
4302 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4303 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4304 rman_get_bushandle(sc->mem_res),
4305 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4306 sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE - 2);
4307 err = mxge_parse_strings(sc);
4308 if (err != 0) {
4309 device_printf(dev, "parse EEPROM string failed\n");
4310 goto failed;
4314 * Enable write combining for efficient use of PCIe bus
4316 mxge_enable_wc(sc);
4319 * Allocate the out of band DMA memory
4321 err = mxge_dma_alloc(sc, &sc->cmd_dma, sizeof(mxge_cmd_t), 64);
4322 if (err != 0) {
4323 device_printf(dev, "alloc cmd DMA buf failed\n");
4324 goto failed;
4326 sc->cmd = sc->cmd_dma.dmem_addr;
4328 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4329 if (err != 0) {
4330 device_printf(dev, "alloc zeropad DMA buf failed\n");
4331 goto failed;
4334 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4335 if (err != 0) {
4336 device_printf(dev, "alloc dmabench DMA buf failed\n");
4337 goto failed;
4340 /* Select & load the firmware */
4341 err = mxge_select_firmware(sc);
4342 if (err != 0) {
4343 device_printf(dev, "select firmware failed\n");
4344 goto failed;
4347 mxge_slice_probe(sc);
4348 err = mxge_alloc_slices(sc);
4349 if (err != 0) {
4350 device_printf(dev, "alloc slices failed\n");
4351 goto failed;
4354 err = mxge_alloc_intr(sc);
4355 if (err != 0) {
4356 device_printf(dev, "alloc intr failed\n");
4357 goto failed;
4360 /* Setup serializes */
4361 mxge_setup_serialize(sc);
4363 err = mxge_reset(sc, 0);
4364 if (err != 0) {
4365 device_printf(dev, "reset failed\n");
4366 goto failed;
4369 err = mxge_alloc_rings(sc);
4370 if (err != 0) {
4371 device_printf(dev, "failed to allocate rings\n");
4372 goto failed;
4375 ifp->if_baudrate = IF_Gbps(10UL);
4376 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO;
4377 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4379 ifp->if_capabilities |= IFCAP_VLAN_MTU;
4380 #if 0
4381 /* Well, its software, sigh */
4382 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
4383 #endif
4384 ifp->if_capenable = ifp->if_capabilities;
4386 ifp->if_softc = sc;
4387 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4388 ifp->if_init = mxge_init;
4389 ifp->if_ioctl = mxge_ioctl;
4390 ifp->if_start = mxge_start;
4391 #ifdef IFPOLL_ENABLE
4392 if (sc->intr_type != PCI_INTR_TYPE_LEGACY)
4393 ifp->if_npoll = mxge_npoll;
4394 #endif
4395 ifp->if_serialize = mxge_serialize;
4396 ifp->if_deserialize = mxge_deserialize;
4397 ifp->if_tryserialize = mxge_tryserialize;
4398 #ifdef INVARIANTS
4399 ifp->if_serialize_assert = mxge_serialize_assert;
4400 #endif
4402 /* Increase TSO burst length */
4403 ifp->if_tsolen = (32 * ETHERMTU);
4405 /* Initialise the ifmedia structure */
4406 mxge_media_init(sc);
4407 mxge_media_probe(sc);
4409 ether_ifattach(ifp, sc->mac_addr, NULL);
4411 /* Setup TX rings and subqueues */
4412 for (i = 0; i < sc->num_tx_rings; ++i) {
4413 struct ifaltq_subque *ifsq = ifq_get_subq(&ifp->if_snd, i);
4414 struct mxge_slice_state *ss = &sc->ss[i];
4416 ifsq_set_cpuid(ifsq, ss->intr_cpuid);
4417 ifsq_set_hw_serialize(ifsq, &ss->tx.tx_serialize);
4418 ifsq_set_priv(ifsq, &ss->tx);
4419 ss->tx.ifsq = ifsq;
4421 ifsq_watchdog_init(&ss->tx.watchdog, ifsq, mxge_watchdog, 0);
4425 * XXX
4426 * We are not ready to do "gather" jumbo frame, so
4427 * limit MTU to MJUMPAGESIZE
4429 sc->max_mtu = MJUMPAGESIZE -
4430 ETHER_HDR_LEN - EVL_ENCAPLEN - MXGEFW_PAD - 1;
4431 sc->dying = 0;
4433 err = mxge_setup_intr(sc);
4434 if (err != 0) {
4435 device_printf(dev, "alloc and setup intr failed\n");
4436 ether_ifdetach(ifp);
4437 goto failed;
4440 mxge_add_sysctls(sc);
4442 /* Increase non-cluster mbuf limit; used by small RX rings */
4443 mb_inclimit(ifp->if_nmbclusters);
4445 callout_reset_bycpu(&sc->co_hdl, mxge_ticks, mxge_tick, sc,
4446 sc->ss[0].intr_cpuid);
4447 return 0;
4449 failed:
4450 mxge_detach(dev);
4451 return err;
4454 static int
4455 mxge_detach(device_t dev)
4457 mxge_softc_t *sc = device_get_softc(dev);
4459 if (device_is_attached(dev)) {
4460 struct ifnet *ifp = sc->ifp;
4461 int mblimit = ifp->if_nmbclusters;
4463 ifnet_serialize_all(ifp);
4465 sc->dying = 1;
4466 if (ifp->if_flags & IFF_RUNNING)
4467 mxge_close(sc, 1);
4468 callout_stop(&sc->co_hdl);
4470 mxge_teardown_intr(sc, sc->num_slices);
4472 ifnet_deserialize_all(ifp);
4474 callout_terminate(&sc->co_hdl);
4476 ether_ifdetach(ifp);
4478 /* Decrease non-cluster mbuf limit increased by us */
4479 mb_inclimit(-mblimit);
4481 ifmedia_removeall(&sc->media);
4483 if (sc->cmd != NULL && sc->zeropad_dma.dmem_addr != NULL &&
4484 sc->sram != NULL)
4485 mxge_dummy_rdma(sc, 0);
4487 mxge_free_intr(sc);
4488 mxge_rem_sysctls(sc);
4489 mxge_free_rings(sc);
4491 /* MUST after sysctls, intr and rings are freed */
4492 mxge_free_slices(sc);
4494 if (sc->dmabench_dma.dmem_addr != NULL)
4495 mxge_dma_free(&sc->dmabench_dma);
4496 if (sc->zeropad_dma.dmem_addr != NULL)
4497 mxge_dma_free(&sc->zeropad_dma);
4498 if (sc->cmd_dma.dmem_addr != NULL)
4499 mxge_dma_free(&sc->cmd_dma);
4501 if (sc->msix_table_res != NULL) {
4502 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(2),
4503 sc->msix_table_res);
4505 if (sc->mem_res != NULL) {
4506 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS,
4507 sc->mem_res);
4510 if (sc->parent_dmat != NULL)
4511 bus_dma_tag_destroy(sc->parent_dmat);
4513 if (sc->ring_map != NULL)
4514 if_ringmap_free(sc->ring_map);
4516 return 0;
4519 static int
4520 mxge_shutdown(device_t dev)
4522 return 0;
4525 static void
4526 mxge_free_msix(struct mxge_softc *sc, boolean_t setup)
4528 int i;
4530 KKASSERT(sc->num_slices > 1);
4532 for (i = 0; i < sc->num_slices; ++i) {
4533 struct mxge_slice_state *ss = &sc->ss[i];
4535 if (ss->intr_res != NULL) {
4536 bus_release_resource(sc->dev, SYS_RES_IRQ,
4537 ss->intr_rid, ss->intr_res);
4539 if (ss->intr_rid >= 0)
4540 pci_release_msix_vector(sc->dev, ss->intr_rid);
4542 if (setup)
4543 pci_teardown_msix(sc->dev);
4546 static int
4547 mxge_alloc_msix(struct mxge_softc *sc)
4549 struct mxge_slice_state *ss;
4550 int rid, error, i;
4551 boolean_t setup = FALSE;
4553 KKASSERT(sc->num_slices > 1);
4555 ss = &sc->ss[0];
4557 ss->intr_serialize = &sc->main_serialize;
4558 ss->intr_func = mxge_msi;
4559 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4560 "%s comb", device_get_nameunit(sc->dev));
4561 ss->intr_desc = ss->intr_desc0;
4562 ss->intr_cpuid = if_ringmap_cpumap(sc->ring_map, 0);
4564 for (i = 1; i < sc->num_slices; ++i) {
4565 ss = &sc->ss[i];
4567 ss->intr_serialize = &ss->rx_data.rx_serialize;
4568 if (sc->num_tx_rings == 1) {
4569 ss->intr_func = mxge_msix_rx;
4570 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4571 "%s rx%d", device_get_nameunit(sc->dev), i);
4572 } else {
4573 ss->intr_func = mxge_msix_rxtx;
4574 ksnprintf(ss->intr_desc0, sizeof(ss->intr_desc0),
4575 "%s rxtx%d", device_get_nameunit(sc->dev), i);
4577 ss->intr_desc = ss->intr_desc0;
4578 ss->intr_cpuid = if_ringmap_cpumap(sc->ring_map, i);
4581 rid = PCIR_BAR(2);
4582 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4583 &rid, RF_ACTIVE);
4584 if (sc->msix_table_res == NULL) {
4585 device_printf(sc->dev, "couldn't alloc MSI-X table res\n");
4586 return ENXIO;
4589 error = pci_setup_msix(sc->dev);
4590 if (error) {
4591 device_printf(sc->dev, "could not setup MSI-X\n");
4592 goto back;
4594 setup = TRUE;
4596 for (i = 0; i < sc->num_slices; ++i) {
4597 ss = &sc->ss[i];
4599 error = pci_alloc_msix_vector(sc->dev, i, &ss->intr_rid,
4600 ss->intr_cpuid);
4601 if (error) {
4602 device_printf(sc->dev, "could not alloc "
4603 "MSI-X %d on cpu%d\n", i, ss->intr_cpuid);
4604 goto back;
4607 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4608 &ss->intr_rid, RF_ACTIVE);
4609 if (ss->intr_res == NULL) {
4610 device_printf(sc->dev, "could not alloc "
4611 "MSI-X %d resource\n", i);
4612 error = ENXIO;
4613 goto back;
4617 pci_enable_msix(sc->dev);
4618 sc->intr_type = PCI_INTR_TYPE_MSIX;
4619 back:
4620 if (error)
4621 mxge_free_msix(sc, setup);
4622 return error;
4625 static int
4626 mxge_alloc_intr(struct mxge_softc *sc)
4628 struct mxge_slice_state *ss;
4629 u_int irq_flags;
4631 if (sc->num_slices > 1) {
4632 int error;
4634 error = mxge_alloc_msix(sc);
4635 if (error)
4636 return error;
4637 KKASSERT(sc->intr_type == PCI_INTR_TYPE_MSIX);
4638 return 0;
4641 ss = &sc->ss[0];
4643 sc->intr_type = pci_alloc_1intr(sc->dev, mxge_msi_enable,
4644 &ss->intr_rid, &irq_flags);
4646 ss->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
4647 &ss->intr_rid, irq_flags);
4648 if (ss->intr_res == NULL) {
4649 device_printf(sc->dev, "could not alloc interrupt\n");
4650 return ENXIO;
4653 if (sc->intr_type == PCI_INTR_TYPE_LEGACY)
4654 ss->intr_func = mxge_legacy;
4655 else
4656 ss->intr_func = mxge_msi;
4657 ss->intr_serialize = &sc->main_serialize;
4658 ss->intr_cpuid = rman_get_cpuid(ss->intr_res);
4660 return 0;
4663 static int
4664 mxge_setup_intr(struct mxge_softc *sc)
4666 int i;
4668 for (i = 0; i < sc->num_slices; ++i) {
4669 struct mxge_slice_state *ss = &sc->ss[i];
4670 int error;
4672 error = bus_setup_intr_descr(sc->dev, ss->intr_res,
4673 INTR_MPSAFE, ss->intr_func, ss, &ss->intr_hand,
4674 ss->intr_serialize, ss->intr_desc);
4675 if (error) {
4676 device_printf(sc->dev, "can't setup %dth intr\n", i);
4677 mxge_teardown_intr(sc, i);
4678 return error;
4681 return 0;
4684 static void
4685 mxge_teardown_intr(struct mxge_softc *sc, int cnt)
4687 int i;
4689 if (sc->ss == NULL)
4690 return;
4692 for (i = 0; i < cnt; ++i) {
4693 struct mxge_slice_state *ss = &sc->ss[i];
4695 bus_teardown_intr(sc->dev, ss->intr_res, ss->intr_hand);
4699 static void
4700 mxge_free_intr(struct mxge_softc *sc)
4702 if (sc->ss == NULL)
4703 return;
4705 if (sc->intr_type != PCI_INTR_TYPE_MSIX) {
4706 struct mxge_slice_state *ss = &sc->ss[0];
4708 if (ss->intr_res != NULL) {
4709 bus_release_resource(sc->dev, SYS_RES_IRQ,
4710 ss->intr_rid, ss->intr_res);
4712 if (sc->intr_type == PCI_INTR_TYPE_MSI)
4713 pci_release_msi(sc->dev);
4714 } else {
4715 mxge_free_msix(sc, TRUE);