get mxge to build, stage 1/many
[dragonfly.git] / sys / dev / netif / mxge / if_mxge.c
blob34d81681f1b14077a6a1a05c18b32a59b45a15f8
1 /******************************************************************************
3 Copyright (c) 2006-2009, Myricom Inc.
4 All rights reserved.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 /*__FBSDID("$FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $");*/
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/sockio.h>
39 #include <sys/mbuf.h>
40 #include <sys/malloc.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/module.h>
44 #include <sys/socket.h>
45 #include <sys/sysctl.h>
47 /* count xmits ourselves, rather than via drbr */
48 #define NO_SLOW_STATS
49 #include <net/if.h>
50 #include <net/if_arp.h>
51 #include <net/ethernet.h>
52 #include <net/if_dl.h>
53 #include <net/if_media.h>
55 #include <net/bpf.h>
57 #include <net/if_types.h>
58 #include <net/vlan/if_vlan_var.h>
59 #include <net/zlib.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/tcp.h>
66 #include <machine/resource.h>
67 #include <sys/bus.h>
68 #include <sys/rman.h>
70 #include <bus/pci/pcireg.h>
71 #include <bus/pci/pcivar.h>
72 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
74 #include <vm/vm.h> /* for pmap_mapdev() */
75 #include <vm/pmap.h>
77 #if defined(__i386) || defined(__amd64)
78 #include <machine/specialreg.h>
79 #endif
81 #include <dev/netif/mxge/mxge_mcp.h>
82 #include <dev/netif/mxge/mcp_gen_header.h>
83 /*#define MXGE_FAKE_IFP*/
84 #include <dev/netif/mxge/if_mxge_var.h>
85 #ifdef IFNET_BUF_RING
86 #include <sys/buf_ring.h>
87 #endif
89 #include "opt_inet.h"
91 /* tunable params */
92 static int mxge_nvidia_ecrc_enable = 1;
93 static int mxge_force_firmware = 0;
94 static int mxge_intr_coal_delay = 30;
95 static int mxge_deassert_wait = 1;
96 static int mxge_flow_control = 1;
97 static int mxge_verbose = 0;
98 static int mxge_lro_cnt = 8;
99 static int mxge_ticks;
100 static int mxge_max_slices = 1;
101 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
102 static int mxge_always_promisc = 0;
103 static int mxge_initial_mtu = ETHERMTU_JUMBO;
104 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
105 static char *mxge_fw_aligned = "mxge_eth_z8e";
106 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
107 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
109 static int mxge_probe(device_t dev);
110 static int mxge_attach(device_t dev);
111 static int mxge_detach(device_t dev);
112 static int mxge_shutdown(device_t dev);
113 static void mxge_intr(void *arg);
115 static device_method_t mxge_methods[] =
117 /* Device interface */
118 DEVMETHOD(device_probe, mxge_probe),
119 DEVMETHOD(device_attach, mxge_attach),
120 DEVMETHOD(device_detach, mxge_detach),
121 DEVMETHOD(device_shutdown, mxge_shutdown),
122 {0, 0}
125 static driver_t mxge_driver =
127 "mxge",
128 mxge_methods,
129 sizeof(mxge_softc_t),
132 static devclass_t mxge_devclass;
134 /* Declare ourselves to be a child of the PCI bus.*/
135 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
136 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
137 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
139 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
140 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
141 static int mxge_close(mxge_softc_t *sc);
142 static int mxge_open(mxge_softc_t *sc);
143 static void mxge_tick(void *arg);
145 static int
146 mxge_probe(device_t dev)
148 int rev;
151 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
152 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
153 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
154 rev = pci_get_revid(dev);
155 switch (rev) {
156 case MXGE_PCI_REV_Z8E:
157 device_set_desc(dev, "Myri10G-PCIE-8A");
158 break;
159 case MXGE_PCI_REV_Z8ES:
160 device_set_desc(dev, "Myri10G-PCIE-8B");
161 break;
162 default:
163 device_set_desc(dev, "Myri10G-PCIE-8??");
164 device_printf(dev, "Unrecognized rev %d NIC\n",
165 rev);
166 break;
168 return 0;
170 return ENXIO;
173 static void
174 mxge_enable_wc(mxge_softc_t *sc)
176 #if defined(__i386) || defined(__amd64)
177 vm_offset_t len;
178 int err;
180 sc->wc = 1;
181 len = rman_get_size(sc->mem_res);
182 err = pmap_change_attr((vm_offset_t) sc->sram,
183 len, PAT_WRITE_COMBINING);
184 if (err != 0) {
185 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
186 err);
187 sc->wc = 0;
189 #endif
193 /* callback to get our DMA address */
194 static void
195 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
196 int error)
198 if (error == 0) {
199 *(bus_addr_t *) arg = segs->ds_addr;
203 static int
204 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
205 bus_size_t alignment)
207 int err;
208 device_t dev = sc->dev;
209 bus_size_t boundary, maxsegsize;
211 if (bytes > 4096 && alignment == 4096) {
212 boundary = 0;
213 maxsegsize = bytes;
214 } else {
215 boundary = 4096;
216 maxsegsize = 4096;
219 /* allocate DMAable memory tags */
220 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
221 alignment, /* alignment */
222 boundary, /* boundary */
223 BUS_SPACE_MAXADDR, /* low */
224 BUS_SPACE_MAXADDR, /* high */
225 NULL, NULL, /* filter */
226 bytes, /* maxsize */
227 1, /* num segs */
228 maxsegsize, /* maxsegsize */
229 BUS_DMA_COHERENT, /* flags */
230 NULL, NULL, /* lock */
231 &dma->dmat); /* tag */
232 if (err != 0) {
233 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
234 return err;
237 /* allocate DMAable memory & map */
238 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
239 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
240 | BUS_DMA_ZERO), &dma->map);
241 if (err != 0) {
242 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
243 goto abort_with_dmat;
246 /* load the memory */
247 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
248 mxge_dmamap_callback,
249 (void *)&dma->bus_addr, 0);
250 if (err != 0) {
251 device_printf(dev, "couldn't load map (err = %d)\n", err);
252 goto abort_with_mem;
254 return 0;
256 abort_with_mem:
257 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
258 abort_with_dmat:
259 (void)bus_dma_tag_destroy(dma->dmat);
260 return err;
264 static void
265 mxge_dma_free(mxge_dma_t *dma)
267 bus_dmamap_unload(dma->dmat, dma->map);
268 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
269 (void)bus_dma_tag_destroy(dma->dmat);
273 * The eeprom strings on the lanaiX have the format
274 * SN=x\0
275 * MAC=x:x:x:x:x:x\0
276 * PC=text\0
279 static int
280 mxge_parse_strings(mxge_softc_t *sc)
282 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
284 char *ptr, *limit;
285 int i, found_mac;
287 ptr = sc->eeprom_strings;
288 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
289 found_mac = 0;
290 while (ptr < limit && *ptr != '\0') {
291 if (memcmp(ptr, "MAC=", 4) == 0) {
292 ptr += 1;
293 sc->mac_addr_string = ptr;
294 for (i = 0; i < 6; i++) {
295 ptr += 3;
296 if ((ptr + 2) > limit)
297 goto abort;
298 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
299 found_mac = 1;
301 } else if (memcmp(ptr, "PC=", 3) == 0) {
302 ptr += 3;
303 strncpy(sc->product_code_string, ptr,
304 sizeof (sc->product_code_string) - 1);
305 } else if (memcmp(ptr, "SN=", 3) == 0) {
306 ptr += 3;
307 strncpy(sc->serial_number_string, ptr,
308 sizeof (sc->serial_number_string) - 1);
310 MXGE_NEXT_STRING(ptr);
313 if (found_mac)
314 return 0;
316 abort:
317 device_printf(sc->dev, "failed to parse eeprom_strings\n");
319 return ENXIO;
322 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
323 static void
324 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
326 uint32_t val;
327 unsigned long base, off;
328 char *va, *cfgptr;
329 device_t pdev, mcp55;
330 uint16_t vendor_id, device_id, word;
331 uintptr_t bus, slot, func, ivend, idev;
332 uint32_t *ptr32;
335 if (!mxge_nvidia_ecrc_enable)
336 return;
338 pdev = device_get_parent(device_get_parent(sc->dev));
339 if (pdev == NULL) {
340 device_printf(sc->dev, "could not find parent?\n");
341 return;
343 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
344 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
346 if (vendor_id != 0x10de)
347 return;
349 base = 0;
351 if (device_id == 0x005d) {
352 /* ck804, base address is magic */
353 base = 0xe0000000UL;
354 } else if (device_id >= 0x0374 && device_id <= 0x378) {
355 /* mcp55, base address stored in chipset */
356 mcp55 = pci_find_bsf(0, 0, 0);
357 if (mcp55 &&
358 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
359 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
360 word = pci_read_config(mcp55, 0x90, 2);
361 base = ((unsigned long)word & 0x7ffeU) << 25;
364 if (!base)
365 return;
367 /* XXXX
368 Test below is commented because it is believed that doing
369 config read/write beyond 0xff will access the config space
370 for the next larger function. Uncomment this and remove
371 the hacky pmap_mapdev() way of accessing config space when
372 FreeBSD grows support for extended pcie config space access
374 #if 0
375 /* See if we can, by some miracle, access the extended
376 config space */
377 val = pci_read_config(pdev, 0x178, 4);
378 if (val != 0xffffffff) {
379 val |= 0x40;
380 pci_write_config(pdev, 0x178, val, 4);
381 return;
383 #endif
384 /* Rather than using normal pci config space writes, we must
385 * map the Nvidia config space ourselves. This is because on
386 * opteron/nvidia class machine the 0xe000000 mapping is
387 * handled by the nvidia chipset, that means the internal PCI
388 * device (the on-chip northbridge), or the amd-8131 bridge
389 * and things behind them are not visible by this method.
392 BUS_READ_IVAR(device_get_parent(pdev), pdev,
393 PCI_IVAR_BUS, &bus);
394 BUS_READ_IVAR(device_get_parent(pdev), pdev,
395 PCI_IVAR_SLOT, &slot);
396 BUS_READ_IVAR(device_get_parent(pdev), pdev,
397 PCI_IVAR_FUNCTION, &func);
398 BUS_READ_IVAR(device_get_parent(pdev), pdev,
399 PCI_IVAR_VENDOR, &ivend);
400 BUS_READ_IVAR(device_get_parent(pdev), pdev,
401 PCI_IVAR_DEVICE, &idev);
403 off = base
404 + 0x00100000UL * (unsigned long)bus
405 + 0x00001000UL * (unsigned long)(func
406 + 8 * slot);
408 /* map it into the kernel */
409 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
412 if (va == NULL) {
413 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
414 return;
416 /* get a pointer to the config space mapped into the kernel */
417 cfgptr = va + (off & PAGE_MASK);
419 /* make sure that we can really access it */
420 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
421 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
422 if (! (vendor_id == ivend && device_id == idev)) {
423 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
424 vendor_id, device_id);
425 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
426 return;
429 ptr32 = (uint32_t*)(cfgptr + 0x178);
430 val = *ptr32;
432 if (val == 0xffffffff) {
433 device_printf(sc->dev, "extended mapping failed\n");
434 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
435 return;
437 *ptr32 = val | 0x40;
438 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
439 if (mxge_verbose)
440 device_printf(sc->dev,
441 "Enabled ECRC on upstream Nvidia bridge "
442 "at %d:%d:%d\n",
443 (int)bus, (int)slot, (int)func);
444 return;
446 #else
447 static void
448 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
450 device_printf(sc->dev,
451 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
452 return;
454 #endif
457 static int
458 mxge_dma_test(mxge_softc_t *sc, int test_type)
460 mxge_cmd_t cmd;
461 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
462 int status;
463 uint32_t len;
464 char *test = " ";
467 /* Run a small DMA test.
468 * The magic multipliers to the length tell the firmware
469 * to do DMA read, write, or read+write tests. The
470 * results are returned in cmd.data0. The upper 16
471 * bits of the return is the number of transfers completed.
472 * The lower 16 bits is the time in 0.5us ticks that the
473 * transfers took to complete.
476 len = sc->tx_boundary;
478 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
479 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
480 cmd.data2 = len * 0x10000;
481 status = mxge_send_cmd(sc, test_type, &cmd);
482 if (status != 0) {
483 test = "read";
484 goto abort;
486 sc->read_dma = ((cmd.data0>>16) * len * 2) /
487 (cmd.data0 & 0xffff);
488 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
489 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
490 cmd.data2 = len * 0x1;
491 status = mxge_send_cmd(sc, test_type, &cmd);
492 if (status != 0) {
493 test = "write";
494 goto abort;
496 sc->write_dma = ((cmd.data0>>16) * len * 2) /
497 (cmd.data0 & 0xffff);
499 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
500 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
501 cmd.data2 = len * 0x10001;
502 status = mxge_send_cmd(sc, test_type, &cmd);
503 if (status != 0) {
504 test = "read/write";
505 goto abort;
507 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
508 (cmd.data0 & 0xffff);
510 abort:
511 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
512 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
513 test, status);
515 return status;
519 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
520 * when the PCI-E Completion packets are aligned on an 8-byte
521 * boundary. Some PCI-E chip sets always align Completion packets; on
522 * the ones that do not, the alignment can be enforced by enabling
523 * ECRC generation (if supported).
525 * When PCI-E Completion packets are not aligned, it is actually more
526 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
528 * If the driver can neither enable ECRC nor verify that it has
529 * already been enabled, then it must use a firmware image which works
530 * around unaligned completion packets (ethp_z8e.dat), and it should
531 * also ensure that it never gives the device a Read-DMA which is
532 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
533 * enabled, then the driver should use the aligned (eth_z8e.dat)
534 * firmware image, and set tx_boundary to 4KB.
537 static int
538 mxge_firmware_probe(mxge_softc_t *sc)
540 device_t dev = sc->dev;
541 int reg, status;
542 uint16_t pectl;
544 sc->tx_boundary = 4096;
546 * Verify the max read request size was set to 4KB
547 * before trying the test with 4KB.
549 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
550 pectl = pci_read_config(dev, reg + 0x8, 2);
551 if ((pectl & (5 << 12)) != (5 << 12)) {
552 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
553 pectl);
554 sc->tx_boundary = 2048;
559 * load the optimized firmware (which assumes aligned PCIe
560 * completions) in order to see if it works on this host.
562 sc->fw_name = mxge_fw_aligned;
563 status = mxge_load_firmware(sc, 1);
564 if (status != 0) {
565 return status;
569 * Enable ECRC if possible
571 mxge_enable_nvidia_ecrc(sc);
574 * Run a DMA test which watches for unaligned completions and
575 * aborts on the first one seen.
578 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
579 if (status == 0)
580 return 0; /* keep the aligned firmware */
582 if (status != E2BIG)
583 device_printf(dev, "DMA test failed: %d\n", status);
584 if (status == ENOSYS)
585 device_printf(dev, "Falling back to ethp! "
586 "Please install up to date fw\n");
587 return status;
590 static int
591 mxge_select_firmware(mxge_softc_t *sc)
593 int aligned = 0;
596 if (mxge_force_firmware != 0) {
597 if (mxge_force_firmware == 1)
598 aligned = 1;
599 else
600 aligned = 0;
601 if (mxge_verbose)
602 device_printf(sc->dev,
603 "Assuming %s completions (forced)\n",
604 aligned ? "aligned" : "unaligned");
605 goto abort;
608 /* if the PCIe link width is 4 or less, we can use the aligned
609 firmware and skip any checks */
610 if (sc->link_width != 0 && sc->link_width <= 4) {
611 device_printf(sc->dev,
612 "PCIe x%d Link, expect reduced performance\n",
613 sc->link_width);
614 aligned = 1;
615 goto abort;
618 if (0 == mxge_firmware_probe(sc))
619 return 0;
621 abort:
622 if (aligned) {
623 sc->fw_name = mxge_fw_aligned;
624 sc->tx_boundary = 4096;
625 } else {
626 sc->fw_name = mxge_fw_unaligned;
627 sc->tx_boundary = 2048;
629 return (mxge_load_firmware(sc, 0));
632 union qualhack
634 const char *ro_char;
635 char *rw_char;
638 static int
639 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
643 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
644 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
645 be32toh(hdr->mcp_type));
646 return EIO;
649 /* save firmware version for sysctl */
650 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
651 if (mxge_verbose)
652 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
654 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
655 &sc->fw_ver_minor, &sc->fw_ver_tiny);
657 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
658 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
659 device_printf(sc->dev, "Found firmware version %s\n",
660 sc->fw_version);
661 device_printf(sc->dev, "Driver needs %d.%d\n",
662 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
663 return EINVAL;
665 return 0;
669 static void *
670 z_alloc(void *nil, u_int items, u_int size)
672 void *ptr;
674 ptr = malloc(items * size, M_TEMP, M_NOWAIT);
675 return ptr;
678 static void
679 z_free(void *nil, void *ptr)
681 free(ptr, M_TEMP);
685 static int
686 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
688 z_stream zs;
689 char *inflate_buffer;
690 const struct firmware *fw;
691 const mcp_gen_header_t *hdr;
692 unsigned hdr_offset;
693 int status;
694 unsigned int i;
695 char dummy;
696 size_t fw_len;
698 fw = firmware_get(sc->fw_name);
699 if (fw == NULL) {
700 device_printf(sc->dev, "Could not find firmware image %s\n",
701 sc->fw_name);
702 return ENOENT;
707 /* setup zlib and decompress f/w */
708 bzero(&zs, sizeof (zs));
709 zs.zalloc = z_alloc;
710 zs.zfree = z_free;
711 status = inflateInit(&zs);
712 if (status != Z_OK) {
713 status = EIO;
714 goto abort_with_fw;
717 /* the uncompressed size is stored as the firmware version,
718 which would otherwise go unused */
719 fw_len = (size_t) fw->version;
720 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT);
721 if (inflate_buffer == NULL)
722 goto abort_with_zs;
723 zs.avail_in = fw->datasize;
724 zs.next_in = __DECONST(char *, fw->data);
725 zs.avail_out = fw_len;
726 zs.next_out = inflate_buffer;
727 status = inflate(&zs, Z_FINISH);
728 if (status != Z_STREAM_END) {
729 device_printf(sc->dev, "zlib %d\n", status);
730 status = EIO;
731 goto abort_with_buffer;
734 /* check id */
735 hdr_offset = htobe32(*(const uint32_t *)
736 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
737 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
738 device_printf(sc->dev, "Bad firmware file");
739 status = EIO;
740 goto abort_with_buffer;
742 hdr = (const void*)(inflate_buffer + hdr_offset);
744 status = mxge_validate_firmware(sc, hdr);
745 if (status != 0)
746 goto abort_with_buffer;
748 /* Copy the inflated firmware to NIC SRAM. */
749 for (i = 0; i < fw_len; i += 256) {
750 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
751 inflate_buffer + i,
752 min(256U, (unsigned)(fw_len - i)));
753 wmb();
754 dummy = *sc->sram;
755 wmb();
758 *limit = fw_len;
759 status = 0;
760 abort_with_buffer:
761 free(inflate_buffer, M_TEMP);
762 abort_with_zs:
763 inflateEnd(&zs);
764 abort_with_fw:
765 firmware_put(fw, FIRMWARE_UNLOAD);
766 return status;
770 * Enable or disable periodic RDMAs from the host to make certain
771 * chipsets resend dropped PCIe messages
774 static void
775 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
777 char buf_bytes[72];
778 volatile uint32_t *confirm;
779 volatile char *submit;
780 uint32_t *buf, dma_low, dma_high;
781 int i;
783 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
785 /* clear confirmation addr */
786 confirm = (volatile uint32_t *)sc->cmd;
787 *confirm = 0;
788 wmb();
790 /* send an rdma command to the PCIe engine, and wait for the
791 response in the confirmation address. The firmware should
792 write a -1 there to indicate it is alive and well
795 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
796 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
797 buf[0] = htobe32(dma_high); /* confirm addr MSW */
798 buf[1] = htobe32(dma_low); /* confirm addr LSW */
799 buf[2] = htobe32(0xffffffff); /* confirm data */
800 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
801 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
802 buf[3] = htobe32(dma_high); /* dummy addr MSW */
803 buf[4] = htobe32(dma_low); /* dummy addr LSW */
804 buf[5] = htobe32(enable); /* enable? */
807 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
809 mxge_pio_copy(submit, buf, 64);
810 wmb();
811 DELAY(1000);
812 wmb();
813 i = 0;
814 while (*confirm != 0xffffffff && i < 20) {
815 DELAY(1000);
816 i++;
818 if (*confirm != 0xffffffff) {
819 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
820 (enable ? "enable" : "disable"), confirm,
821 *confirm);
823 return;
826 static int
827 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
829 mcp_cmd_t *buf;
830 char buf_bytes[sizeof(*buf) + 8];
831 volatile mcp_cmd_response_t *response = sc->cmd;
832 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
833 uint32_t dma_low, dma_high;
834 int err, sleep_total = 0;
836 /* ensure buf is aligned to 8 bytes */
837 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
839 buf->data0 = htobe32(data->data0);
840 buf->data1 = htobe32(data->data1);
841 buf->data2 = htobe32(data->data2);
842 buf->cmd = htobe32(cmd);
843 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
844 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
846 buf->response_addr.low = htobe32(dma_low);
847 buf->response_addr.high = htobe32(dma_high);
848 mtx_lock(&sc->cmd_mtx);
849 response->result = 0xffffffff;
850 wmb();
851 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
853 /* wait up to 20ms */
854 err = EAGAIN;
855 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
856 bus_dmamap_sync(sc->cmd_dma.dmat,
857 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
858 wmb();
859 switch (be32toh(response->result)) {
860 case 0:
861 data->data0 = be32toh(response->data);
862 err = 0;
863 break;
864 case 0xffffffff:
865 DELAY(1000);
866 break;
867 case MXGEFW_CMD_UNKNOWN:
868 err = ENOSYS;
869 break;
870 case MXGEFW_CMD_ERROR_UNALIGNED:
871 err = E2BIG;
872 break;
873 case MXGEFW_CMD_ERROR_BUSY:
874 err = EBUSY;
875 break;
876 default:
877 device_printf(sc->dev,
878 "mxge: command %d "
879 "failed, result = %d\n",
880 cmd, be32toh(response->result));
881 err = ENXIO;
882 break;
884 if (err != EAGAIN)
885 break;
887 if (err == EAGAIN)
888 device_printf(sc->dev, "mxge: command %d timed out"
889 "result = %d\n",
890 cmd, be32toh(response->result));
891 mtx_unlock(&sc->cmd_mtx);
892 return err;
895 static int
896 mxge_adopt_running_firmware(mxge_softc_t *sc)
898 struct mcp_gen_header *hdr;
899 const size_t bytes = sizeof (struct mcp_gen_header);
900 size_t hdr_offset;
901 int status;
903 /* find running firmware header */
904 hdr_offset = htobe32(*(volatile uint32_t *)
905 (sc->sram + MCP_HEADER_PTR_OFFSET));
907 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
908 device_printf(sc->dev,
909 "Running firmware has bad header offset (%d)\n",
910 (int)hdr_offset);
911 return EIO;
914 /* copy header of running firmware from SRAM to host memory to
915 * validate firmware */
916 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT);
917 if (hdr == NULL) {
918 device_printf(sc->dev, "could not malloc firmware hdr\n");
919 return ENOMEM;
921 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
922 rman_get_bushandle(sc->mem_res),
923 hdr_offset, (char *)hdr, bytes);
924 status = mxge_validate_firmware(sc, hdr);
925 free(hdr, M_DEVBUF);
928 * check to see if adopted firmware has bug where adopting
929 * it will cause broadcasts to be filtered unless the NIC
930 * is kept in ALLMULTI mode
932 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
933 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
934 sc->adopted_rx_filter_bug = 1;
935 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
936 "working around rx filter bug\n",
937 sc->fw_ver_major, sc->fw_ver_minor,
938 sc->fw_ver_tiny);
941 return status;
945 static int
946 mxge_load_firmware(mxge_softc_t *sc, int adopt)
948 volatile uint32_t *confirm;
949 volatile char *submit;
950 char buf_bytes[72];
951 uint32_t *buf, size, dma_low, dma_high;
952 int status, i;
954 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
956 size = sc->sram_size;
957 status = mxge_load_firmware_helper(sc, &size);
958 if (status) {
959 if (!adopt)
960 return status;
961 /* Try to use the currently running firmware, if
962 it is new enough */
963 status = mxge_adopt_running_firmware(sc);
964 if (status) {
965 device_printf(sc->dev,
966 "failed to adopt running firmware\n");
967 return status;
969 device_printf(sc->dev,
970 "Successfully adopted running firmware\n");
971 if (sc->tx_boundary == 4096) {
972 device_printf(sc->dev,
973 "Using firmware currently running on NIC"
974 ". For optimal\n");
975 device_printf(sc->dev,
976 "performance consider loading optimized "
977 "firmware\n");
979 sc->fw_name = mxge_fw_unaligned;
980 sc->tx_boundary = 2048;
981 return 0;
983 /* clear confirmation addr */
984 confirm = (volatile uint32_t *)sc->cmd;
985 *confirm = 0;
986 wmb();
987 /* send a reload command to the bootstrap MCP, and wait for the
988 response in the confirmation address. The firmware should
989 write a -1 there to indicate it is alive and well
992 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
993 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
995 buf[0] = htobe32(dma_high); /* confirm addr MSW */
996 buf[1] = htobe32(dma_low); /* confirm addr LSW */
997 buf[2] = htobe32(0xffffffff); /* confirm data */
999 /* FIX: All newest firmware should un-protect the bottom of
1000 the sram before handoff. However, the very first interfaces
1001 do not. Therefore the handoff copy must skip the first 8 bytes
1003 /* where the code starts*/
1004 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1005 buf[4] = htobe32(size - 8); /* length of code */
1006 buf[5] = htobe32(8); /* where to copy to */
1007 buf[6] = htobe32(0); /* where to jump to */
1009 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1010 mxge_pio_copy(submit, buf, 64);
1011 wmb();
1012 DELAY(1000);
1013 wmb();
1014 i = 0;
1015 while (*confirm != 0xffffffff && i < 20) {
1016 DELAY(1000*10);
1017 i++;
1018 bus_dmamap_sync(sc->cmd_dma.dmat,
1019 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1021 if (*confirm != 0xffffffff) {
1022 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1023 confirm, *confirm);
1025 return ENXIO;
1027 return 0;
1030 static int
1031 mxge_update_mac_address(mxge_softc_t *sc)
1033 mxge_cmd_t cmd;
1034 uint8_t *addr = sc->mac_addr;
1035 int status;
1038 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1039 | (addr[2] << 8) | addr[3]);
1041 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1043 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1044 return status;
1047 static int
1048 mxge_change_pause(mxge_softc_t *sc, int pause)
1050 mxge_cmd_t cmd;
1051 int status;
1053 if (pause)
1054 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1055 &cmd);
1056 else
1057 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1058 &cmd);
1060 if (status) {
1061 device_printf(sc->dev, "Failed to set flow control mode\n");
1062 return ENXIO;
1064 sc->pause = pause;
1065 return 0;
1068 static void
1069 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1071 mxge_cmd_t cmd;
1072 int status;
1074 if (mxge_always_promisc)
1075 promisc = 1;
1077 if (promisc)
1078 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1079 &cmd);
1080 else
1081 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1082 &cmd);
1084 if (status) {
1085 device_printf(sc->dev, "Failed to set promisc mode\n");
1089 static void
1090 mxge_set_multicast_list(mxge_softc_t *sc)
1092 mxge_cmd_t cmd;
1093 struct ifmultiaddr *ifma;
1094 struct ifnet *ifp = sc->ifp;
1095 int err;
1097 /* This firmware is known to not support multicast */
1098 if (!sc->fw_multicast_support)
1099 return;
1101 /* Disable multicast filtering while we play with the lists*/
1102 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1103 if (err != 0) {
1104 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1105 " error status: %d\n", err);
1106 return;
1109 if (sc->adopted_rx_filter_bug)
1110 return;
1112 if (ifp->if_flags & IFF_ALLMULTI)
1113 /* request to disable multicast filtering, so quit here */
1114 return;
1116 /* Flush all the filters */
1118 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1119 if (err != 0) {
1120 device_printf(sc->dev,
1121 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1122 ", error status: %d\n", err);
1123 return;
1126 /* Walk the multicast list, and add each address */
1128 if_maddr_rlock(ifp);
1129 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1130 if (ifma->ifma_addr->sa_family != AF_LINK)
1131 continue;
1132 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1133 &cmd.data0, 4);
1134 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1135 &cmd.data1, 2);
1136 cmd.data0 = htonl(cmd.data0);
1137 cmd.data1 = htonl(cmd.data1);
1138 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1139 if (err != 0) {
1140 device_printf(sc->dev, "Failed "
1141 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1142 "%d\t", err);
1143 /* abort, leaving multicast filtering off */
1144 if_maddr_runlock(ifp);
1145 return;
1148 if_maddr_runlock(ifp);
1149 /* Enable multicast filtering */
1150 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1151 if (err != 0) {
1152 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1153 ", error status: %d\n", err);
1157 static int
1158 mxge_max_mtu(mxge_softc_t *sc)
1160 mxge_cmd_t cmd;
1161 int status;
1163 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1164 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1166 /* try to set nbufs to see if it we can
1167 use virtually contiguous jumbos */
1168 cmd.data0 = 0;
1169 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1170 &cmd);
1171 if (status == 0)
1172 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1174 /* otherwise, we're limited to MJUMPAGESIZE */
1175 return MJUMPAGESIZE - MXGEFW_PAD;
1178 static int
1179 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1181 struct mxge_slice_state *ss;
1182 mxge_rx_done_t *rx_done;
1183 volatile uint32_t *irq_claim;
1184 mxge_cmd_t cmd;
1185 int slice, status;
1187 /* try to send a reset command to the card to see if it
1188 is alive */
1189 memset(&cmd, 0, sizeof (cmd));
1190 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1191 if (status != 0) {
1192 device_printf(sc->dev, "failed reset\n");
1193 return ENXIO;
1196 mxge_dummy_rdma(sc, 1);
1199 /* set the intrq size */
1200 cmd.data0 = sc->rx_ring_size;
1201 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1204 * Even though we already know how many slices are supported
1205 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1206 * has magic side effects, and must be called after a reset.
1207 * It must be called prior to calling any RSS related cmds,
1208 * including assigning an interrupt queue for anything but
1209 * slice 0. It must also be called *after*
1210 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1211 * the firmware to compute offsets.
1214 if (sc->num_slices > 1) {
1215 /* ask the maximum number of slices it supports */
1216 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1217 &cmd);
1218 if (status != 0) {
1219 device_printf(sc->dev,
1220 "failed to get number of slices\n");
1221 return status;
1224 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1225 * to setting up the interrupt queue DMA
1227 cmd.data0 = sc->num_slices;
1228 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1229 #ifdef IFNET_BUF_RING
1230 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1231 #endif
1232 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1233 &cmd);
1234 if (status != 0) {
1235 device_printf(sc->dev,
1236 "failed to set number of slices\n");
1237 return status;
1242 if (interrupts_setup) {
1243 /* Now exchange information about interrupts */
1244 for (slice = 0; slice < sc->num_slices; slice++) {
1245 rx_done = &sc->ss[slice].rx_done;
1246 memset(rx_done->entry, 0, sc->rx_ring_size);
1247 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1248 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1249 cmd.data2 = slice;
1250 status |= mxge_send_cmd(sc,
1251 MXGEFW_CMD_SET_INTRQ_DMA,
1252 &cmd);
1256 status |= mxge_send_cmd(sc,
1257 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1260 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1262 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1263 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1266 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1267 &cmd);
1268 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1269 if (status != 0) {
1270 device_printf(sc->dev, "failed set interrupt parameters\n");
1271 return status;
1275 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1278 /* run a DMA benchmark */
1279 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1281 for (slice = 0; slice < sc->num_slices; slice++) {
1282 ss = &sc->ss[slice];
1284 ss->irq_claim = irq_claim + (2 * slice);
1285 /* reset mcp/driver shared state back to 0 */
1286 ss->rx_done.idx = 0;
1287 ss->rx_done.cnt = 0;
1288 ss->tx.req = 0;
1289 ss->tx.done = 0;
1290 ss->tx.pkt_done = 0;
1291 ss->tx.queue_active = 0;
1292 ss->tx.activate = 0;
1293 ss->tx.deactivate = 0;
1294 ss->tx.wake = 0;
1295 ss->tx.defrag = 0;
1296 ss->tx.stall = 0;
1297 ss->rx_big.cnt = 0;
1298 ss->rx_small.cnt = 0;
1299 ss->lro_bad_csum = 0;
1300 ss->lro_queued = 0;
1301 ss->lro_flushed = 0;
1302 if (ss->fw_stats != NULL) {
1303 ss->fw_stats->valid = 0;
1304 ss->fw_stats->send_done_count = 0;
1307 sc->rdma_tags_available = 15;
1308 status = mxge_update_mac_address(sc);
1309 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1310 mxge_change_pause(sc, sc->pause);
1311 mxge_set_multicast_list(sc);
1312 return status;
1315 static int
1316 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1318 mxge_softc_t *sc;
1319 unsigned int intr_coal_delay;
1320 int err;
1322 sc = arg1;
1323 intr_coal_delay = sc->intr_coal_delay;
1324 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1325 if (err != 0) {
1326 return err;
1328 if (intr_coal_delay == sc->intr_coal_delay)
1329 return 0;
1331 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1332 return EINVAL;
1334 mtx_lock(&sc->driver_mtx);
1335 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1336 sc->intr_coal_delay = intr_coal_delay;
1338 mtx_unlock(&sc->driver_mtx);
1339 return err;
1342 static int
1343 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1345 mxge_softc_t *sc;
1346 unsigned int enabled;
1347 int err;
1349 sc = arg1;
1350 enabled = sc->pause;
1351 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1352 if (err != 0) {
1353 return err;
1355 if (enabled == sc->pause)
1356 return 0;
1358 mtx_lock(&sc->driver_mtx);
1359 err = mxge_change_pause(sc, enabled);
1360 mtx_unlock(&sc->driver_mtx);
1361 return err;
1364 static int
1365 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1367 struct ifnet *ifp;
1368 int err = 0;
1370 ifp = sc->ifp;
1371 if (lro_cnt == 0)
1372 ifp->if_capenable &= ~IFCAP_LRO;
1373 else
1374 ifp->if_capenable |= IFCAP_LRO;
1375 sc->lro_cnt = lro_cnt;
1376 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1377 mxge_close(sc);
1378 err = mxge_open(sc);
1380 return err;
1383 static int
1384 mxge_change_lro(SYSCTL_HANDLER_ARGS)
1386 mxge_softc_t *sc;
1387 unsigned int lro_cnt;
1388 int err;
1390 sc = arg1;
1391 lro_cnt = sc->lro_cnt;
1392 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1393 if (err != 0)
1394 return err;
1396 if (lro_cnt == sc->lro_cnt)
1397 return 0;
1399 if (lro_cnt > 128)
1400 return EINVAL;
1402 mtx_lock(&sc->driver_mtx);
1403 err = mxge_change_lro_locked(sc, lro_cnt);
1404 mtx_unlock(&sc->driver_mtx);
1405 return err;
1408 static int
1409 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1411 int err;
1413 if (arg1 == NULL)
1414 return EFAULT;
1415 arg2 = be32toh(*(int *)arg1);
1416 arg1 = NULL;
1417 err = sysctl_handle_int(oidp, arg1, arg2, req);
1419 return err;
1422 static void
1423 mxge_rem_sysctls(mxge_softc_t *sc)
1425 struct mxge_slice_state *ss;
1426 int slice;
1428 if (sc->slice_sysctl_tree == NULL)
1429 return;
1431 for (slice = 0; slice < sc->num_slices; slice++) {
1432 ss = &sc->ss[slice];
1433 if (ss == NULL || ss->sysctl_tree == NULL)
1434 continue;
1435 sysctl_ctx_free(&ss->sysctl_ctx);
1436 ss->sysctl_tree = NULL;
1438 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1439 sc->slice_sysctl_tree = NULL;
1442 static void
1443 mxge_add_sysctls(mxge_softc_t *sc)
1445 struct sysctl_ctx_list *ctx;
1446 struct sysctl_oid_list *children;
1447 mcp_irq_data_t *fw;
1448 struct mxge_slice_state *ss;
1449 int slice;
1450 char slice_num[8];
1452 ctx = device_get_sysctl_ctx(sc->dev);
1453 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1454 fw = sc->ss[0].fw_stats;
1456 /* random information */
1457 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1458 "firmware_version",
1459 CTLFLAG_RD, &sc->fw_version,
1460 0, "firmware version");
1461 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1462 "serial_number",
1463 CTLFLAG_RD, &sc->serial_number_string,
1464 0, "serial number");
1465 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1466 "product_code",
1467 CTLFLAG_RD, &sc->product_code_string,
1468 0, "product_code");
1469 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1470 "pcie_link_width",
1471 CTLFLAG_RD, &sc->link_width,
1472 0, "tx_boundary");
1473 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1474 "tx_boundary",
1475 CTLFLAG_RD, &sc->tx_boundary,
1476 0, "tx_boundary");
1477 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1478 "write_combine",
1479 CTLFLAG_RD, &sc->wc,
1480 0, "write combining PIO?");
1481 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1482 "read_dma_MBs",
1483 CTLFLAG_RD, &sc->read_dma,
1484 0, "DMA Read speed in MB/s");
1485 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1486 "write_dma_MBs",
1487 CTLFLAG_RD, &sc->write_dma,
1488 0, "DMA Write speed in MB/s");
1489 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1490 "read_write_dma_MBs",
1491 CTLFLAG_RD, &sc->read_write_dma,
1492 0, "DMA concurrent Read/Write speed in MB/s");
1495 /* performance related tunables */
1496 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1497 "intr_coal_delay",
1498 CTLTYPE_INT|CTLFLAG_RW, sc,
1499 0, mxge_change_intr_coal,
1500 "I", "interrupt coalescing delay in usecs");
1502 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1503 "flow_control_enabled",
1504 CTLTYPE_INT|CTLFLAG_RW, sc,
1505 0, mxge_change_flow_control,
1506 "I", "interrupt coalescing delay in usecs");
1508 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1509 "deassert_wait",
1510 CTLFLAG_RW, &mxge_deassert_wait,
1511 0, "Wait for IRQ line to go low in ihandler");
1513 /* stats block from firmware is in network byte order.
1514 Need to swap it */
1515 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1516 "link_up",
1517 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1518 0, mxge_handle_be32,
1519 "I", "link up");
1520 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1521 "rdma_tags_available",
1522 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1523 0, mxge_handle_be32,
1524 "I", "rdma_tags_available");
1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1526 "dropped_bad_crc32",
1527 CTLTYPE_INT|CTLFLAG_RD,
1528 &fw->dropped_bad_crc32,
1529 0, mxge_handle_be32,
1530 "I", "dropped_bad_crc32");
1531 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1532 "dropped_bad_phy",
1533 CTLTYPE_INT|CTLFLAG_RD,
1534 &fw->dropped_bad_phy,
1535 0, mxge_handle_be32,
1536 "I", "dropped_bad_phy");
1537 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1538 "dropped_link_error_or_filtered",
1539 CTLTYPE_INT|CTLFLAG_RD,
1540 &fw->dropped_link_error_or_filtered,
1541 0, mxge_handle_be32,
1542 "I", "dropped_link_error_or_filtered");
1543 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1544 "dropped_link_overflow",
1545 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1546 0, mxge_handle_be32,
1547 "I", "dropped_link_overflow");
1548 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1549 "dropped_multicast_filtered",
1550 CTLTYPE_INT|CTLFLAG_RD,
1551 &fw->dropped_multicast_filtered,
1552 0, mxge_handle_be32,
1553 "I", "dropped_multicast_filtered");
1554 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1555 "dropped_no_big_buffer",
1556 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1557 0, mxge_handle_be32,
1558 "I", "dropped_no_big_buffer");
1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1560 "dropped_no_small_buffer",
1561 CTLTYPE_INT|CTLFLAG_RD,
1562 &fw->dropped_no_small_buffer,
1563 0, mxge_handle_be32,
1564 "I", "dropped_no_small_buffer");
1565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1566 "dropped_overrun",
1567 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1568 0, mxge_handle_be32,
1569 "I", "dropped_overrun");
1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1571 "dropped_pause",
1572 CTLTYPE_INT|CTLFLAG_RD,
1573 &fw->dropped_pause,
1574 0, mxge_handle_be32,
1575 "I", "dropped_pause");
1576 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1577 "dropped_runt",
1578 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1579 0, mxge_handle_be32,
1580 "I", "dropped_runt");
1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1583 "dropped_unicast_filtered",
1584 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1585 0, mxge_handle_be32,
1586 "I", "dropped_unicast_filtered");
1588 /* verbose printing? */
1589 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1590 "verbose",
1591 CTLFLAG_RW, &mxge_verbose,
1592 0, "verbose printing");
1594 /* lro */
1595 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1596 "lro_cnt",
1597 CTLTYPE_INT|CTLFLAG_RW, sc,
1598 0, mxge_change_lro,
1599 "I", "number of lro merge queues");
1602 /* add counters exported for debugging from all slices */
1603 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1604 sc->slice_sysctl_tree =
1605 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1606 "slice", CTLFLAG_RD, 0, "");
1608 for (slice = 0; slice < sc->num_slices; slice++) {
1609 ss = &sc->ss[slice];
1610 sysctl_ctx_init(&ss->sysctl_ctx);
1611 ctx = &ss->sysctl_ctx;
1612 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1613 sprintf(slice_num, "%d", slice);
1614 ss->sysctl_tree =
1615 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1616 CTLFLAG_RD, 0, "");
1617 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1618 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1619 "rx_small_cnt",
1620 CTLFLAG_RD, &ss->rx_small.cnt,
1621 0, "rx_small_cnt");
1622 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1623 "rx_big_cnt",
1624 CTLFLAG_RD, &ss->rx_big.cnt,
1625 0, "rx_small_cnt");
1626 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1627 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1628 0, "number of lro merge queues flushed");
1630 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1631 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1632 0, "number of frames appended to lro merge"
1633 "queues");
1635 #ifndef IFNET_BUF_RING
1636 /* only transmit from slice 0 for now */
1637 if (slice > 0)
1638 continue;
1639 #endif
1640 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1641 "tx_req",
1642 CTLFLAG_RD, &ss->tx.req,
1643 0, "tx_req");
1645 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1646 "tx_done",
1647 CTLFLAG_RD, &ss->tx.done,
1648 0, "tx_done");
1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1650 "tx_pkt_done",
1651 CTLFLAG_RD, &ss->tx.pkt_done,
1652 0, "tx_done");
1653 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1654 "tx_stall",
1655 CTLFLAG_RD, &ss->tx.stall,
1656 0, "tx_stall");
1657 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1658 "tx_wake",
1659 CTLFLAG_RD, &ss->tx.wake,
1660 0, "tx_wake");
1661 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1662 "tx_defrag",
1663 CTLFLAG_RD, &ss->tx.defrag,
1664 0, "tx_defrag");
1665 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1666 "tx_queue_active",
1667 CTLFLAG_RD, &ss->tx.queue_active,
1668 0, "tx_queue_active");
1669 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1670 "tx_activate",
1671 CTLFLAG_RD, &ss->tx.activate,
1672 0, "tx_activate");
1673 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1674 "tx_deactivate",
1675 CTLFLAG_RD, &ss->tx.deactivate,
1676 0, "tx_deactivate");
1680 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1681 backwards one at a time and handle ring wraps */
1683 static inline void
1684 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1685 mcp_kreq_ether_send_t *src, int cnt)
1687 int idx, starting_slot;
1688 starting_slot = tx->req;
1689 while (cnt > 1) {
1690 cnt--;
1691 idx = (starting_slot + cnt) & tx->mask;
1692 mxge_pio_copy(&tx->lanai[idx],
1693 &src[cnt], sizeof(*src));
1694 wmb();
1699 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1700 * at most 32 bytes at a time, so as to avoid involving the software
1701 * pio handler in the nic. We re-write the first segment's flags
1702 * to mark them valid only after writing the entire chain
1705 static inline void
1706 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1707 int cnt)
1709 int idx, i;
1710 uint32_t *src_ints;
1711 volatile uint32_t *dst_ints;
1712 mcp_kreq_ether_send_t *srcp;
1713 volatile mcp_kreq_ether_send_t *dstp, *dst;
1714 uint8_t last_flags;
1716 idx = tx->req & tx->mask;
1718 last_flags = src->flags;
1719 src->flags = 0;
1720 wmb();
1721 dst = dstp = &tx->lanai[idx];
1722 srcp = src;
1724 if ((idx + cnt) < tx->mask) {
1725 for (i = 0; i < (cnt - 1); i += 2) {
1726 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1727 wmb(); /* force write every 32 bytes */
1728 srcp += 2;
1729 dstp += 2;
1731 } else {
1732 /* submit all but the first request, and ensure
1733 that it is submitted below */
1734 mxge_submit_req_backwards(tx, src, cnt);
1735 i = 0;
1737 if (i < cnt) {
1738 /* submit the first request */
1739 mxge_pio_copy(dstp, srcp, sizeof(*src));
1740 wmb(); /* barrier before setting valid flag */
1743 /* re-write the last 32-bits with the valid flags */
1744 src->flags = last_flags;
1745 src_ints = (uint32_t *)src;
1746 src_ints+=3;
1747 dst_ints = (volatile uint32_t *)dst;
1748 dst_ints+=3;
1749 *dst_ints = *src_ints;
1750 tx->req += cnt;
1751 wmb();
1754 #if IFCAP_TSO4
1756 static void
1757 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1758 int busdma_seg_cnt, int ip_off)
1760 mxge_tx_ring_t *tx;
1761 mcp_kreq_ether_send_t *req;
1762 bus_dma_segment_t *seg;
1763 struct ip *ip;
1764 struct tcphdr *tcp;
1765 uint32_t low, high_swapped;
1766 int len, seglen, cum_len, cum_len_next;
1767 int next_is_first, chop, cnt, rdma_count, small;
1768 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1769 uint8_t flags, flags_next;
1770 static int once;
1772 mss = m->m_pkthdr.tso_segsz;
1774 /* negative cum_len signifies to the
1775 * send loop that we are still in the
1776 * header portion of the TSO packet.
1779 /* ensure we have the ethernet, IP and TCP
1780 header together in the first mbuf, copy
1781 it to a scratch buffer if not */
1782 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1783 m_copydata(m, 0, ip_off + sizeof (*ip),
1784 ss->scratch);
1785 ip = (struct ip *)(ss->scratch + ip_off);
1786 } else {
1787 ip = (struct ip *)(mtod(m, char *) + ip_off);
1789 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1790 + sizeof (*tcp))) {
1791 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1792 + sizeof (*tcp), ss->scratch);
1793 ip = (struct ip *)(mtod(m, char *) + ip_off);
1796 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1797 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1799 /* TSO implies checksum offload on this hardware */
1800 cksum_offset = ip_off + (ip->ip_hl << 2);
1801 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1804 /* for TSO, pseudo_hdr_offset holds mss.
1805 * The firmware figures out where to put
1806 * the checksum by parsing the header. */
1807 pseudo_hdr_offset = htobe16(mss);
1809 tx = &ss->tx;
1810 req = tx->req_list;
1811 seg = tx->seg_list;
1812 cnt = 0;
1813 rdma_count = 0;
1814 /* "rdma_count" is the number of RDMAs belonging to the
1815 * current packet BEFORE the current send request. For
1816 * non-TSO packets, this is equal to "count".
1817 * For TSO packets, rdma_count needs to be reset
1818 * to 0 after a segment cut.
1820 * The rdma_count field of the send request is
1821 * the number of RDMAs of the packet starting at
1822 * that request. For TSO send requests with one ore more cuts
1823 * in the middle, this is the number of RDMAs starting
1824 * after the last cut in the request. All previous
1825 * segments before the last cut implicitly have 1 RDMA.
1827 * Since the number of RDMAs is not known beforehand,
1828 * it must be filled-in retroactively - after each
1829 * segmentation cut or at the end of the entire packet.
1832 while (busdma_seg_cnt) {
1833 /* Break the busdma segment up into pieces*/
1834 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1835 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1836 len = seg->ds_len;
1838 while (len) {
1839 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1840 seglen = len;
1841 cum_len_next = cum_len + seglen;
1842 (req-rdma_count)->rdma_count = rdma_count + 1;
1843 if (__predict_true(cum_len >= 0)) {
1844 /* payload */
1845 chop = (cum_len_next > mss);
1846 cum_len_next = cum_len_next % mss;
1847 next_is_first = (cum_len_next == 0);
1848 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1849 flags_next |= next_is_first *
1850 MXGEFW_FLAGS_FIRST;
1851 rdma_count |= -(chop | next_is_first);
1852 rdma_count += chop & !next_is_first;
1853 } else if (cum_len_next >= 0) {
1854 /* header ends */
1855 rdma_count = -1;
1856 cum_len_next = 0;
1857 seglen = -cum_len;
1858 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1859 flags_next = MXGEFW_FLAGS_TSO_PLD |
1860 MXGEFW_FLAGS_FIRST |
1861 (small * MXGEFW_FLAGS_SMALL);
1864 req->addr_high = high_swapped;
1865 req->addr_low = htobe32(low);
1866 req->pseudo_hdr_offset = pseudo_hdr_offset;
1867 req->pad = 0;
1868 req->rdma_count = 1;
1869 req->length = htobe16(seglen);
1870 req->cksum_offset = cksum_offset;
1871 req->flags = flags | ((cum_len & 1) *
1872 MXGEFW_FLAGS_ALIGN_ODD);
1873 low += seglen;
1874 len -= seglen;
1875 cum_len = cum_len_next;
1876 flags = flags_next;
1877 req++;
1878 cnt++;
1879 rdma_count++;
1880 if (__predict_false(cksum_offset > seglen))
1881 cksum_offset -= seglen;
1882 else
1883 cksum_offset = 0;
1884 if (__predict_false(cnt > tx->max_desc))
1885 goto drop;
1887 busdma_seg_cnt--;
1888 seg++;
1890 (req-rdma_count)->rdma_count = rdma_count;
1892 do {
1893 req--;
1894 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1895 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1897 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1898 mxge_submit_req(tx, tx->req_list, cnt);
1899 #ifdef IFNET_BUF_RING
1900 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1901 /* tell the NIC to start polling this slice */
1902 *tx->send_go = 1;
1903 tx->queue_active = 1;
1904 tx->activate++;
1905 wmb();
1907 #endif
1908 return;
1910 drop:
1911 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1912 m_freem(m);
1913 ss->oerrors++;
1914 if (!once) {
1915 printf("tx->max_desc exceeded via TSO!\n");
1916 printf("mss = %d, %ld, %d!\n", mss,
1917 (long)seg - (long)tx->seg_list, tx->max_desc);
1918 once = 1;
1920 return;
1924 #endif /* IFCAP_TSO4 */
1926 #ifdef MXGE_NEW_VLAN_API
1928 * We reproduce the software vlan tag insertion from
1929 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1930 * vlan tag insertion. We need to advertise this in order to have the
1931 * vlan interface respect our csum offload flags.
1933 static struct mbuf *
1934 mxge_vlan_tag_insert(struct mbuf *m)
1936 struct ether_vlan_header *evl;
1938 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
1939 if (__predict_false(m == NULL))
1940 return NULL;
1941 if (m->m_len < sizeof(*evl)) {
1942 m = m_pullup(m, sizeof(*evl));
1943 if (__predict_false(m == NULL))
1944 return NULL;
1947 * Transform the Ethernet header into an Ethernet header
1948 * with 802.1Q encapsulation.
1950 evl = mtod(m, struct ether_vlan_header *);
1951 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
1952 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1953 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1954 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
1955 m->m_flags &= ~M_VLANTAG;
1956 return m;
1958 #endif /* MXGE_NEW_VLAN_API */
1960 static void
1961 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
1963 mxge_softc_t *sc;
1964 mcp_kreq_ether_send_t *req;
1965 bus_dma_segment_t *seg;
1966 struct mbuf *m_tmp;
1967 struct ifnet *ifp;
1968 mxge_tx_ring_t *tx;
1969 struct ip *ip;
1970 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
1971 uint16_t pseudo_hdr_offset;
1972 uint8_t flags, cksum_offset;
1975 sc = ss->sc;
1976 ifp = sc->ifp;
1977 tx = &ss->tx;
1979 ip_off = sizeof (struct ether_header);
1980 #ifdef MXGE_NEW_VLAN_API
1981 if (m->m_flags & M_VLANTAG) {
1982 m = mxge_vlan_tag_insert(m);
1983 if (__predict_false(m == NULL))
1984 goto drop;
1985 ip_off += ETHER_VLAN_ENCAP_LEN;
1987 #endif
1988 /* (try to) map the frame for DMA */
1989 idx = tx->req & tx->mask;
1990 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
1991 m, tx->seg_list, &cnt,
1992 BUS_DMA_NOWAIT);
1993 if (__predict_false(err == EFBIG)) {
1994 /* Too many segments in the chain. Try
1995 to defrag */
1996 m_tmp = m_defrag(m, M_NOWAIT);
1997 if (m_tmp == NULL) {
1998 goto drop;
2000 ss->tx.defrag++;
2001 m = m_tmp;
2002 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2003 tx->info[idx].map,
2004 m, tx->seg_list, &cnt,
2005 BUS_DMA_NOWAIT);
2007 if (__predict_false(err != 0)) {
2008 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2009 " packet len = %d\n", err, m->m_pkthdr.len);
2010 goto drop;
2012 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2013 BUS_DMASYNC_PREWRITE);
2014 tx->info[idx].m = m;
2016 #if IFCAP_TSO4
2017 /* TSO is different enough, we handle it in another routine */
2018 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2019 mxge_encap_tso(ss, m, cnt, ip_off);
2020 return;
2022 #endif
2024 req = tx->req_list;
2025 cksum_offset = 0;
2026 pseudo_hdr_offset = 0;
2027 flags = MXGEFW_FLAGS_NO_TSO;
2029 /* checksum offloading? */
2030 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
2031 /* ensure ip header is in first mbuf, copy
2032 it to a scratch buffer if not */
2033 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2034 m_copydata(m, 0, ip_off + sizeof (*ip),
2035 ss->scratch);
2036 ip = (struct ip *)(ss->scratch + ip_off);
2037 } else {
2038 ip = (struct ip *)(mtod(m, char *) + ip_off);
2040 cksum_offset = ip_off + (ip->ip_hl << 2);
2041 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2042 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2043 req->cksum_offset = cksum_offset;
2044 flags |= MXGEFW_FLAGS_CKSUM;
2045 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2046 } else {
2047 odd_flag = 0;
2049 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2050 flags |= MXGEFW_FLAGS_SMALL;
2052 /* convert segments into a request list */
2053 cum_len = 0;
2054 seg = tx->seg_list;
2055 req->flags = MXGEFW_FLAGS_FIRST;
2056 for (i = 0; i < cnt; i++) {
2057 req->addr_low =
2058 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2059 req->addr_high =
2060 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2061 req->length = htobe16(seg->ds_len);
2062 req->cksum_offset = cksum_offset;
2063 if (cksum_offset > seg->ds_len)
2064 cksum_offset -= seg->ds_len;
2065 else
2066 cksum_offset = 0;
2067 req->pseudo_hdr_offset = pseudo_hdr_offset;
2068 req->pad = 0; /* complete solid 16-byte block */
2069 req->rdma_count = 1;
2070 req->flags |= flags | ((cum_len & 1) * odd_flag);
2071 cum_len += seg->ds_len;
2072 seg++;
2073 req++;
2074 req->flags = 0;
2076 req--;
2077 /* pad runts to 60 bytes */
2078 if (cum_len < 60) {
2079 req++;
2080 req->addr_low =
2081 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2082 req->addr_high =
2083 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2084 req->length = htobe16(60 - cum_len);
2085 req->cksum_offset = 0;
2086 req->pseudo_hdr_offset = pseudo_hdr_offset;
2087 req->pad = 0; /* complete solid 16-byte block */
2088 req->rdma_count = 1;
2089 req->flags |= flags | ((cum_len & 1) * odd_flag);
2090 cnt++;
2093 tx->req_list[0].rdma_count = cnt;
2094 #if 0
2095 /* print what the firmware will see */
2096 for (i = 0; i < cnt; i++) {
2097 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2098 "cso:%d, flags:0x%x, rdma:%d\n",
2099 i, (int)ntohl(tx->req_list[i].addr_high),
2100 (int)ntohl(tx->req_list[i].addr_low),
2101 (int)ntohs(tx->req_list[i].length),
2102 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2103 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2104 tx->req_list[i].rdma_count);
2106 printf("--------------\n");
2107 #endif
2108 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2109 mxge_submit_req(tx, tx->req_list, cnt);
2110 #ifdef IFNET_BUF_RING
2111 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2112 /* tell the NIC to start polling this slice */
2113 *tx->send_go = 1;
2114 tx->queue_active = 1;
2115 tx->activate++;
2116 wmb();
2118 #endif
2119 return;
2121 drop:
2122 m_freem(m);
2123 ss->oerrors++;
2124 return;
2127 #ifdef IFNET_BUF_RING
2128 static void
2129 mxge_qflush(struct ifnet *ifp)
2131 mxge_softc_t *sc = ifp->if_softc;
2132 mxge_tx_ring_t *tx;
2133 struct mbuf *m;
2134 int slice;
2136 for (slice = 0; slice < sc->num_slices; slice++) {
2137 tx = &sc->ss[slice].tx;
2138 mtx_lock(&tx->mtx);
2139 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2140 m_freem(m);
2141 mtx_unlock(&tx->mtx);
2143 if_qflush(ifp);
2146 static inline void
2147 mxge_start_locked(struct mxge_slice_state *ss)
2149 mxge_softc_t *sc;
2150 struct mbuf *m;
2151 struct ifnet *ifp;
2152 mxge_tx_ring_t *tx;
2154 sc = ss->sc;
2155 ifp = sc->ifp;
2156 tx = &ss->tx;
2158 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2159 m = drbr_dequeue(ifp, tx->br);
2160 if (m == NULL) {
2161 return;
2163 /* let BPF see it */
2164 BPF_MTAP(ifp, m);
2166 /* give it to the nic */
2167 mxge_encap(ss, m);
2169 /* ran out of transmit slots */
2170 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2171 && (!drbr_empty(ifp, tx->br))) {
2172 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2173 tx->stall++;
2177 static int
2178 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2180 mxge_softc_t *sc;
2181 struct ifnet *ifp;
2182 mxge_tx_ring_t *tx;
2183 int err;
2185 sc = ss->sc;
2186 ifp = sc->ifp;
2187 tx = &ss->tx;
2189 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2190 IFF_DRV_RUNNING) {
2191 err = drbr_enqueue(ifp, tx->br, m);
2192 return (err);
2195 if (drbr_empty(ifp, tx->br) &&
2196 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2197 /* let BPF see it */
2198 BPF_MTAP(ifp, m);
2199 /* give it to the nic */
2200 mxge_encap(ss, m);
2201 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2202 return (err);
2204 if (!drbr_empty(ifp, tx->br))
2205 mxge_start_locked(ss);
2206 return (0);
2209 static int
2210 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2212 mxge_softc_t *sc = ifp->if_softc;
2213 struct mxge_slice_state *ss;
2214 mxge_tx_ring_t *tx;
2215 int err = 0;
2216 int slice;
2218 slice = m->m_pkthdr.flowid;
2219 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2221 ss = &sc->ss[slice];
2222 tx = &ss->tx;
2224 if (mtx_trylock(&tx->mtx)) {
2225 err = mxge_transmit_locked(ss, m);
2226 mtx_unlock(&tx->mtx);
2227 } else {
2228 err = drbr_enqueue(ifp, tx->br, m);
2231 return (err);
2234 #else
2236 static inline void
2237 mxge_start_locked(struct mxge_slice_state *ss)
2239 mxge_softc_t *sc;
2240 struct mbuf *m;
2241 struct ifnet *ifp;
2242 mxge_tx_ring_t *tx;
2244 sc = ss->sc;
2245 ifp = sc->ifp;
2246 tx = &ss->tx;
2247 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2248 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2249 if (m == NULL) {
2250 return;
2252 /* let BPF see it */
2253 BPF_MTAP(ifp, m);
2255 /* give it to the nic */
2256 mxge_encap(ss, m);
2258 /* ran out of transmit slots */
2259 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2260 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2261 tx->stall++;
2264 #endif
2265 static void
2266 mxge_start(struct ifnet *ifp)
2268 mxge_softc_t *sc = ifp->if_softc;
2269 struct mxge_slice_state *ss;
2271 /* only use the first slice for now */
2272 ss = &sc->ss[0];
2273 mtx_lock(&ss->tx.mtx);
2274 mxge_start_locked(ss);
2275 mtx_unlock(&ss->tx.mtx);
2279 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2280 * at most 32 bytes at a time, so as to avoid involving the software
2281 * pio handler in the nic. We re-write the first segment's low
2282 * DMA address to mark it valid only after we write the entire chunk
2283 * in a burst
2285 static inline void
2286 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2287 mcp_kreq_ether_recv_t *src)
2289 uint32_t low;
2291 low = src->addr_low;
2292 src->addr_low = 0xffffffff;
2293 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2294 wmb();
2295 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2296 wmb();
2297 src->addr_low = low;
2298 dst->addr_low = low;
2299 wmb();
2302 static int
2303 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2305 bus_dma_segment_t seg;
2306 struct mbuf *m;
2307 mxge_rx_ring_t *rx = &ss->rx_small;
2308 int cnt, err;
2310 m = m_gethdr(M_DONTWAIT, MT_DATA);
2311 if (m == NULL) {
2312 rx->alloc_fail++;
2313 err = ENOBUFS;
2314 goto done;
2316 m->m_len = MHLEN;
2317 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2318 &seg, &cnt, BUS_DMA_NOWAIT);
2319 if (err != 0) {
2320 m_free(m);
2321 goto done;
2323 rx->info[idx].m = m;
2324 rx->shadow[idx].addr_low =
2325 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2326 rx->shadow[idx].addr_high =
2327 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2329 done:
2330 if ((idx & 7) == 7)
2331 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2332 return err;
2335 static int
2336 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2338 bus_dma_segment_t seg[3];
2339 struct mbuf *m;
2340 mxge_rx_ring_t *rx = &ss->rx_big;
2341 int cnt, err, i;
2343 if (rx->cl_size == MCLBYTES)
2344 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2345 else
2346 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2347 if (m == NULL) {
2348 rx->alloc_fail++;
2349 err = ENOBUFS;
2350 goto done;
2352 m->m_len = rx->mlen;
2353 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2354 seg, &cnt, BUS_DMA_NOWAIT);
2355 if (err != 0) {
2356 m_free(m);
2357 goto done;
2359 rx->info[idx].m = m;
2360 rx->shadow[idx].addr_low =
2361 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2362 rx->shadow[idx].addr_high =
2363 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2365 #if MXGE_VIRT_JUMBOS
2366 for (i = 1; i < cnt; i++) {
2367 rx->shadow[idx + i].addr_low =
2368 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2369 rx->shadow[idx + i].addr_high =
2370 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2372 #endif
2374 done:
2375 for (i = 0; i < rx->nbufs; i++) {
2376 if ((idx & 7) == 7) {
2377 mxge_submit_8rx(&rx->lanai[idx - 7],
2378 &rx->shadow[idx - 7]);
2380 idx++;
2382 return err;
2386 * Myri10GE hardware checksums are not valid if the sender
2387 * padded the frame with non-zero padding. This is because
2388 * the firmware just does a simple 16-bit 1s complement
2389 * checksum across the entire frame, excluding the first 14
2390 * bytes. It is best to simply to check the checksum and
2391 * tell the stack about it only if the checksum is good
2394 static inline uint16_t
2395 mxge_rx_csum(struct mbuf *m, int csum)
2397 struct ether_header *eh;
2398 struct ip *ip;
2399 uint16_t c;
2401 eh = mtod(m, struct ether_header *);
2403 /* only deal with IPv4 TCP & UDP for now */
2404 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2405 return 1;
2406 ip = (struct ip *)(eh + 1);
2407 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2408 ip->ip_p != IPPROTO_UDP))
2409 return 1;
2410 #ifdef INET
2411 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2412 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2413 - (ip->ip_hl << 2) + ip->ip_p));
2414 #else
2415 c = 1;
2416 #endif
2417 c ^= 0xffff;
2418 return (c);
2421 static void
2422 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2424 struct ether_vlan_header *evl;
2425 struct ether_header *eh;
2426 uint32_t partial;
2428 evl = mtod(m, struct ether_vlan_header *);
2429 eh = mtod(m, struct ether_header *);
2432 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2433 * after what the firmware thought was the end of the ethernet
2434 * header.
2437 /* put checksum into host byte order */
2438 *csum = ntohs(*csum);
2439 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2440 (*csum) += ~partial;
2441 (*csum) += ((*csum) < ~partial);
2442 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2443 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2445 /* restore checksum to network byte order;
2446 later consumers expect this */
2447 *csum = htons(*csum);
2449 /* save the tag */
2450 #ifdef MXGE_NEW_VLAN_API
2451 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2452 #else
2454 struct m_tag *mtag;
2455 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2456 M_NOWAIT);
2457 if (mtag == NULL)
2458 return;
2459 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2460 m_tag_prepend(m, mtag);
2463 #endif
2464 m->m_flags |= M_VLANTAG;
2467 * Remove the 802.1q header by copying the Ethernet
2468 * addresses over it and adjusting the beginning of
2469 * the data in the mbuf. The encapsulated Ethernet
2470 * type field is already in place.
2472 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2473 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2474 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2478 static inline void
2479 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2481 mxge_softc_t *sc;
2482 struct ifnet *ifp;
2483 struct mbuf *m;
2484 struct ether_header *eh;
2485 mxge_rx_ring_t *rx;
2486 bus_dmamap_t old_map;
2487 int idx;
2488 uint16_t tcpudp_csum;
2490 sc = ss->sc;
2491 ifp = sc->ifp;
2492 rx = &ss->rx_big;
2493 idx = rx->cnt & rx->mask;
2494 rx->cnt += rx->nbufs;
2495 /* save a pointer to the received mbuf */
2496 m = rx->info[idx].m;
2497 /* try to replace the received mbuf */
2498 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2499 /* drop the frame -- the old mbuf is re-cycled */
2500 ifp->if_ierrors++;
2501 return;
2504 /* unmap the received buffer */
2505 old_map = rx->info[idx].map;
2506 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2507 bus_dmamap_unload(rx->dmat, old_map);
2509 /* swap the bus_dmamap_t's */
2510 rx->info[idx].map = rx->extra_map;
2511 rx->extra_map = old_map;
2513 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2514 * aligned */
2515 m->m_data += MXGEFW_PAD;
2517 m->m_pkthdr.rcvif = ifp;
2518 m->m_len = m->m_pkthdr.len = len;
2519 ss->ipackets++;
2520 eh = mtod(m, struct ether_header *);
2521 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2522 mxge_vlan_tag_remove(m, &csum);
2524 /* if the checksum is valid, mark it in the mbuf header */
2525 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2526 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2527 return;
2528 /* otherwise, it was a UDP frame, or a TCP frame which
2529 we could not do LRO on. Tell the stack that the
2530 checksum is good */
2531 m->m_pkthdr.csum_data = 0xffff;
2532 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2534 /* flowid only valid if RSS hashing is enabled */
2535 if (sc->num_slices > 1) {
2536 m->m_pkthdr.flowid = (ss - sc->ss);
2537 m->m_flags |= M_FLOWID;
2539 /* pass the frame up the stack */
2540 (*ifp->if_input)(ifp, m);
2543 static inline void
2544 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2546 mxge_softc_t *sc;
2547 struct ifnet *ifp;
2548 struct ether_header *eh;
2549 struct mbuf *m;
2550 mxge_rx_ring_t *rx;
2551 bus_dmamap_t old_map;
2552 int idx;
2553 uint16_t tcpudp_csum;
2555 sc = ss->sc;
2556 ifp = sc->ifp;
2557 rx = &ss->rx_small;
2558 idx = rx->cnt & rx->mask;
2559 rx->cnt++;
2560 /* save a pointer to the received mbuf */
2561 m = rx->info[idx].m;
2562 /* try to replace the received mbuf */
2563 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2564 /* drop the frame -- the old mbuf is re-cycled */
2565 ifp->if_ierrors++;
2566 return;
2569 /* unmap the received buffer */
2570 old_map = rx->info[idx].map;
2571 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2572 bus_dmamap_unload(rx->dmat, old_map);
2574 /* swap the bus_dmamap_t's */
2575 rx->info[idx].map = rx->extra_map;
2576 rx->extra_map = old_map;
2578 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2579 * aligned */
2580 m->m_data += MXGEFW_PAD;
2582 m->m_pkthdr.rcvif = ifp;
2583 m->m_len = m->m_pkthdr.len = len;
2584 ss->ipackets++;
2585 eh = mtod(m, struct ether_header *);
2586 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2587 mxge_vlan_tag_remove(m, &csum);
2589 /* if the checksum is valid, mark it in the mbuf header */
2590 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2591 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2592 return;
2593 /* otherwise, it was a UDP frame, or a TCP frame which
2594 we could not do LRO on. Tell the stack that the
2595 checksum is good */
2596 m->m_pkthdr.csum_data = 0xffff;
2597 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2599 /* flowid only valid if RSS hashing is enabled */
2600 if (sc->num_slices > 1) {
2601 m->m_pkthdr.flowid = (ss - sc->ss);
2602 m->m_flags |= M_FLOWID;
2604 /* pass the frame up the stack */
2605 (*ifp->if_input)(ifp, m);
2608 static inline void
2609 mxge_clean_rx_done(struct mxge_slice_state *ss)
2611 mxge_rx_done_t *rx_done = &ss->rx_done;
2612 int limit = 0;
2613 uint16_t length;
2614 uint16_t checksum;
2617 while (rx_done->entry[rx_done->idx].length != 0) {
2618 length = ntohs(rx_done->entry[rx_done->idx].length);
2619 rx_done->entry[rx_done->idx].length = 0;
2620 checksum = rx_done->entry[rx_done->idx].checksum;
2621 if (length <= (MHLEN - MXGEFW_PAD))
2622 mxge_rx_done_small(ss, length, checksum);
2623 else
2624 mxge_rx_done_big(ss, length, checksum);
2625 rx_done->cnt++;
2626 rx_done->idx = rx_done->cnt & rx_done->mask;
2628 /* limit potential for livelock */
2629 if (__predict_false(++limit > rx_done->mask / 2))
2630 break;
2632 #ifdef INET
2633 while (!SLIST_EMPTY(&ss->lro_active)) {
2634 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
2635 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2636 mxge_lro_flush(ss, lro);
2638 #endif
2642 static inline void
2643 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2645 struct ifnet *ifp;
2646 mxge_tx_ring_t *tx;
2647 struct mbuf *m;
2648 bus_dmamap_t map;
2649 int idx;
2650 int *flags;
2652 tx = &ss->tx;
2653 ifp = ss->sc->ifp;
2654 while (tx->pkt_done != mcp_idx) {
2655 idx = tx->done & tx->mask;
2656 tx->done++;
2657 m = tx->info[idx].m;
2658 /* mbuf and DMA map only attached to the first
2659 segment per-mbuf */
2660 if (m != NULL) {
2661 ss->obytes += m->m_pkthdr.len;
2662 if (m->m_flags & M_MCAST)
2663 ss->omcasts++;
2664 ss->opackets++;
2665 tx->info[idx].m = NULL;
2666 map = tx->info[idx].map;
2667 bus_dmamap_unload(tx->dmat, map);
2668 m_freem(m);
2670 if (tx->info[idx].flag) {
2671 tx->info[idx].flag = 0;
2672 tx->pkt_done++;
2676 /* If we have space, clear IFF_OACTIVE to tell the stack that
2677 its OK to send packets */
2678 #ifdef IFNET_BUF_RING
2679 flags = &ss->if_drv_flags;
2680 #else
2681 flags = &ifp->if_drv_flags;
2682 #endif
2683 mtx_lock(&ss->tx.mtx);
2684 if ((*flags) & IFF_DRV_OACTIVE &&
2685 tx->req - tx->done < (tx->mask + 1)/4) {
2686 *(flags) &= ~IFF_DRV_OACTIVE;
2687 ss->tx.wake++;
2688 mxge_start_locked(ss);
2690 #ifdef IFNET_BUF_RING
2691 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2692 /* let the NIC stop polling this queue, since there
2693 * are no more transmits pending */
2694 if (tx->req == tx->done) {
2695 *tx->send_stop = 1;
2696 tx->queue_active = 0;
2697 tx->deactivate++;
2698 wmb();
2701 #endif
2702 mtx_unlock(&ss->tx.mtx);
2706 static struct mxge_media_type mxge_xfp_media_types[] =
2708 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2709 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2710 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2711 {0, (1 << 5), "10GBASE-ER"},
2712 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2713 {0, (1 << 3), "10GBASE-SW"},
2714 {0, (1 << 2), "10GBASE-LW"},
2715 {0, (1 << 1), "10GBASE-EW"},
2716 {0, (1 << 0), "Reserved"}
2718 static struct mxge_media_type mxge_sfp_media_types[] =
2720 {0, (1 << 7), "Reserved"},
2721 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2722 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2723 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}
2726 static void
2727 mxge_set_media(mxge_softc_t *sc, int type)
2729 sc->media_flags |= type;
2730 ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
2731 ifmedia_set(&sc->media, sc->media_flags);
2736 * Determine the media type for a NIC. Some XFPs will identify
2737 * themselves only when their link is up, so this is initiated via a
2738 * link up interrupt. However, this can potentially take up to
2739 * several milliseconds, so it is run via the watchdog routine, rather
2740 * than in the interrupt handler itself. This need only be done
2741 * once, not each time the link is up.
2743 static void
2744 mxge_media_probe(mxge_softc_t *sc)
2746 mxge_cmd_t cmd;
2747 char *cage_type;
2748 char *ptr;
2749 struct mxge_media_type *mxge_media_types = NULL;
2750 int i, err, ms, mxge_media_type_entries;
2751 uint32_t byte;
2753 sc->need_media_probe = 0;
2755 /* if we've already set a media type, we're done */
2756 if (sc->media_flags != (IFM_ETHER | IFM_AUTO))
2757 return;
2760 * parse the product code to deterimine the interface type
2761 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2762 * after the 3rd dash in the driver's cached copy of the
2763 * EEPROM's product code string.
2765 ptr = sc->product_code_string;
2766 if (ptr == NULL) {
2767 device_printf(sc->dev, "Missing product code\n");
2770 for (i = 0; i < 3; i++, ptr++) {
2771 ptr = index(ptr, '-');
2772 if (ptr == NULL) {
2773 device_printf(sc->dev,
2774 "only %d dashes in PC?!?\n", i);
2775 return;
2778 if (*ptr == 'C') {
2779 /* -C is CX4 */
2780 mxge_set_media(sc, IFM_10G_CX4);
2781 return;
2783 else if (*ptr == 'Q') {
2784 /* -Q is Quad Ribbon Fiber */
2785 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2786 /* FreeBSD has no media type for Quad ribbon fiber */
2787 return;
2790 if (*ptr == 'R') {
2791 /* -R is XFP */
2792 mxge_media_types = mxge_xfp_media_types;
2793 mxge_media_type_entries =
2794 sizeof (mxge_xfp_media_types) /
2795 sizeof (mxge_xfp_media_types[0]);
2796 byte = MXGE_XFP_COMPLIANCE_BYTE;
2797 cage_type = "XFP";
2800 if (*ptr == 'S' || *(ptr +1) == 'S') {
2801 /* -S or -2S is SFP+ */
2802 mxge_media_types = mxge_sfp_media_types;
2803 mxge_media_type_entries =
2804 sizeof (mxge_sfp_media_types) /
2805 sizeof (mxge_sfp_media_types[0]);
2806 cage_type = "SFP+";
2807 byte = 3;
2810 if (mxge_media_types == NULL) {
2811 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2812 return;
2816 * At this point we know the NIC has an XFP cage, so now we
2817 * try to determine what is in the cage by using the
2818 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2819 * register. We read just one byte, which may take over
2820 * a millisecond
2823 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2824 cmd.data1 = byte;
2825 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2826 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2827 device_printf(sc->dev, "failed to read XFP\n");
2829 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2830 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2832 if (err != MXGEFW_CMD_OK) {
2833 return;
2836 /* now we wait for the data to be cached */
2837 cmd.data0 = byte;
2838 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2839 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2840 DELAY(1000);
2841 cmd.data0 = byte;
2842 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2844 if (err != MXGEFW_CMD_OK) {
2845 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2846 cage_type, err, ms);
2847 return;
2850 if (cmd.data0 == mxge_media_types[0].bitmask) {
2851 if (mxge_verbose)
2852 device_printf(sc->dev, "%s:%s\n", cage_type,
2853 mxge_media_types[0].name);
2854 mxge_set_media(sc, IFM_10G_CX4);
2855 return;
2857 for (i = 1; i < mxge_media_type_entries; i++) {
2858 if (cmd.data0 & mxge_media_types[i].bitmask) {
2859 if (mxge_verbose)
2860 device_printf(sc->dev, "%s:%s\n",
2861 cage_type,
2862 mxge_media_types[i].name);
2864 mxge_set_media(sc, mxge_media_types[i].flag);
2865 return;
2868 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type,
2869 cmd.data0);
2871 return;
2874 static void
2875 mxge_intr(void *arg)
2877 struct mxge_slice_state *ss = arg;
2878 mxge_softc_t *sc = ss->sc;
2879 mcp_irq_data_t *stats = ss->fw_stats;
2880 mxge_tx_ring_t *tx = &ss->tx;
2881 mxge_rx_done_t *rx_done = &ss->rx_done;
2882 uint32_t send_done_count;
2883 uint8_t valid;
2886 #ifndef IFNET_BUF_RING
2887 /* an interrupt on a non-zero slice is implicitly valid
2888 since MSI-X irqs are not shared */
2889 if (ss != sc->ss) {
2890 mxge_clean_rx_done(ss);
2891 *ss->irq_claim = be32toh(3);
2892 return;
2894 #endif
2896 /* make sure the DMA has finished */
2897 if (!stats->valid) {
2898 return;
2900 valid = stats->valid;
2902 if (sc->legacy_irq) {
2903 /* lower legacy IRQ */
2904 *sc->irq_deassert = 0;
2905 if (!mxge_deassert_wait)
2906 /* don't wait for conf. that irq is low */
2907 stats->valid = 0;
2908 } else {
2909 stats->valid = 0;
2912 /* loop while waiting for legacy irq deassertion */
2913 do {
2914 /* check for transmit completes and receives */
2915 send_done_count = be32toh(stats->send_done_count);
2916 while ((send_done_count != tx->pkt_done) ||
2917 (rx_done->entry[rx_done->idx].length != 0)) {
2918 if (send_done_count != tx->pkt_done)
2919 mxge_tx_done(ss, (int)send_done_count);
2920 mxge_clean_rx_done(ss);
2921 send_done_count = be32toh(stats->send_done_count);
2923 if (sc->legacy_irq && mxge_deassert_wait)
2924 wmb();
2925 } while (*((volatile uint8_t *) &stats->valid));
2927 /* fw link & error stats meaningful only on the first slice */
2928 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
2929 if (sc->link_state != stats->link_up) {
2930 sc->link_state = stats->link_up;
2931 if (sc->link_state) {
2932 if_link_state_change(sc->ifp, LINK_STATE_UP);
2933 if (mxge_verbose)
2934 device_printf(sc->dev, "link up\n");
2935 } else {
2936 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
2937 if (mxge_verbose)
2938 device_printf(sc->dev, "link down\n");
2940 sc->need_media_probe = 1;
2942 if (sc->rdma_tags_available !=
2943 be32toh(stats->rdma_tags_available)) {
2944 sc->rdma_tags_available =
2945 be32toh(stats->rdma_tags_available);
2946 device_printf(sc->dev, "RDMA timed out! %d tags "
2947 "left\n", sc->rdma_tags_available);
2950 if (stats->link_down) {
2951 sc->down_cnt += stats->link_down;
2952 sc->link_state = 0;
2953 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
2957 /* check to see if we have rx token to pass back */
2958 if (valid & 0x1)
2959 *ss->irq_claim = be32toh(3);
2960 *(ss->irq_claim + 1) = be32toh(3);
2963 static void
2964 mxge_init(void *arg)
2970 static void
2971 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
2973 struct lro_entry *lro_entry;
2974 int i;
2976 while (!SLIST_EMPTY(&ss->lro_free)) {
2977 lro_entry = SLIST_FIRST(&ss->lro_free);
2978 SLIST_REMOVE_HEAD(&ss->lro_free, next);
2979 free(lro_entry, M_DEVBUF);
2982 for (i = 0; i <= ss->rx_big.mask; i++) {
2983 if (ss->rx_big.info[i].m == NULL)
2984 continue;
2985 bus_dmamap_unload(ss->rx_big.dmat,
2986 ss->rx_big.info[i].map);
2987 m_freem(ss->rx_big.info[i].m);
2988 ss->rx_big.info[i].m = NULL;
2991 for (i = 0; i <= ss->rx_small.mask; i++) {
2992 if (ss->rx_small.info[i].m == NULL)
2993 continue;
2994 bus_dmamap_unload(ss->rx_small.dmat,
2995 ss->rx_small.info[i].map);
2996 m_freem(ss->rx_small.info[i].m);
2997 ss->rx_small.info[i].m = NULL;
3000 /* transmit ring used only on the first slice */
3001 if (ss->tx.info == NULL)
3002 return;
3004 for (i = 0; i <= ss->tx.mask; i++) {
3005 ss->tx.info[i].flag = 0;
3006 if (ss->tx.info[i].m == NULL)
3007 continue;
3008 bus_dmamap_unload(ss->tx.dmat,
3009 ss->tx.info[i].map);
3010 m_freem(ss->tx.info[i].m);
3011 ss->tx.info[i].m = NULL;
3015 static void
3016 mxge_free_mbufs(mxge_softc_t *sc)
3018 int slice;
3020 for (slice = 0; slice < sc->num_slices; slice++)
3021 mxge_free_slice_mbufs(&sc->ss[slice]);
3024 static void
3025 mxge_free_slice_rings(struct mxge_slice_state *ss)
3027 int i;
3030 if (ss->rx_done.entry != NULL)
3031 mxge_dma_free(&ss->rx_done.dma);
3032 ss->rx_done.entry = NULL;
3034 if (ss->tx.req_bytes != NULL)
3035 free(ss->tx.req_bytes, M_DEVBUF);
3036 ss->tx.req_bytes = NULL;
3038 if (ss->tx.seg_list != NULL)
3039 free(ss->tx.seg_list, M_DEVBUF);
3040 ss->tx.seg_list = NULL;
3042 if (ss->rx_small.shadow != NULL)
3043 free(ss->rx_small.shadow, M_DEVBUF);
3044 ss->rx_small.shadow = NULL;
3046 if (ss->rx_big.shadow != NULL)
3047 free(ss->rx_big.shadow, M_DEVBUF);
3048 ss->rx_big.shadow = NULL;
3050 if (ss->tx.info != NULL) {
3051 if (ss->tx.dmat != NULL) {
3052 for (i = 0; i <= ss->tx.mask; i++) {
3053 bus_dmamap_destroy(ss->tx.dmat,
3054 ss->tx.info[i].map);
3056 bus_dma_tag_destroy(ss->tx.dmat);
3058 free(ss->tx.info, M_DEVBUF);
3060 ss->tx.info = NULL;
3062 if (ss->rx_small.info != NULL) {
3063 if (ss->rx_small.dmat != NULL) {
3064 for (i = 0; i <= ss->rx_small.mask; i++) {
3065 bus_dmamap_destroy(ss->rx_small.dmat,
3066 ss->rx_small.info[i].map);
3068 bus_dmamap_destroy(ss->rx_small.dmat,
3069 ss->rx_small.extra_map);
3070 bus_dma_tag_destroy(ss->rx_small.dmat);
3072 free(ss->rx_small.info, M_DEVBUF);
3074 ss->rx_small.info = NULL;
3076 if (ss->rx_big.info != NULL) {
3077 if (ss->rx_big.dmat != NULL) {
3078 for (i = 0; i <= ss->rx_big.mask; i++) {
3079 bus_dmamap_destroy(ss->rx_big.dmat,
3080 ss->rx_big.info[i].map);
3082 bus_dmamap_destroy(ss->rx_big.dmat,
3083 ss->rx_big.extra_map);
3084 bus_dma_tag_destroy(ss->rx_big.dmat);
3086 free(ss->rx_big.info, M_DEVBUF);
3088 ss->rx_big.info = NULL;
3091 static void
3092 mxge_free_rings(mxge_softc_t *sc)
3094 int slice;
3096 for (slice = 0; slice < sc->num_slices; slice++)
3097 mxge_free_slice_rings(&sc->ss[slice]);
3100 static int
3101 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3102 int tx_ring_entries)
3104 mxge_softc_t *sc = ss->sc;
3105 size_t bytes;
3106 int err, i;
3108 err = ENOMEM;
3110 /* allocate per-slice receive resources */
3112 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3113 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3115 /* allocate the rx shadow rings */
3116 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3117 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3118 if (ss->rx_small.shadow == NULL)
3119 return err;;
3121 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3122 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3123 if (ss->rx_big.shadow == NULL)
3124 return err;;
3126 /* allocate the rx host info rings */
3127 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3128 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3129 if (ss->rx_small.info == NULL)
3130 return err;;
3132 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3133 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3134 if (ss->rx_big.info == NULL)
3135 return err;;
3137 /* allocate the rx busdma resources */
3138 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3139 1, /* alignment */
3140 4096, /* boundary */
3141 BUS_SPACE_MAXADDR, /* low */
3142 BUS_SPACE_MAXADDR, /* high */
3143 NULL, NULL, /* filter */
3144 MHLEN, /* maxsize */
3145 1, /* num segs */
3146 MHLEN, /* maxsegsize */
3147 BUS_DMA_ALLOCNOW, /* flags */
3148 NULL, NULL, /* lock */
3149 &ss->rx_small.dmat); /* tag */
3150 if (err != 0) {
3151 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3152 err);
3153 return err;;
3156 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3157 1, /* alignment */
3158 #if MXGE_VIRT_JUMBOS
3159 4096, /* boundary */
3160 #else
3161 0, /* boundary */
3162 #endif
3163 BUS_SPACE_MAXADDR, /* low */
3164 BUS_SPACE_MAXADDR, /* high */
3165 NULL, NULL, /* filter */
3166 3*4096, /* maxsize */
3167 #if MXGE_VIRT_JUMBOS
3168 3, /* num segs */
3169 4096, /* maxsegsize*/
3170 #else
3171 1, /* num segs */
3172 MJUM9BYTES, /* maxsegsize*/
3173 #endif
3174 BUS_DMA_ALLOCNOW, /* flags */
3175 NULL, NULL, /* lock */
3176 &ss->rx_big.dmat); /* tag */
3177 if (err != 0) {
3178 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3179 err);
3180 return err;;
3182 for (i = 0; i <= ss->rx_small.mask; i++) {
3183 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3184 &ss->rx_small.info[i].map);
3185 if (err != 0) {
3186 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3187 err);
3188 return err;;
3191 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3192 &ss->rx_small.extra_map);
3193 if (err != 0) {
3194 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3195 err);
3196 return err;;
3199 for (i = 0; i <= ss->rx_big.mask; i++) {
3200 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3201 &ss->rx_big.info[i].map);
3202 if (err != 0) {
3203 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3204 err);
3205 return err;;
3208 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3209 &ss->rx_big.extra_map);
3210 if (err != 0) {
3211 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3212 err);
3213 return err;;
3216 /* now allocate TX resouces */
3218 #ifndef IFNET_BUF_RING
3219 /* only use a single TX ring for now */
3220 if (ss != ss->sc->ss)
3221 return 0;
3222 #endif
3224 ss->tx.mask = tx_ring_entries - 1;
3225 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3228 /* allocate the tx request copy block */
3229 bytes = 8 +
3230 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3231 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK);
3232 if (ss->tx.req_bytes == NULL)
3233 return err;;
3234 /* ensure req_list entries are aligned to 8 bytes */
3235 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3236 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3238 /* allocate the tx busdma segment list */
3239 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3240 ss->tx.seg_list = (bus_dma_segment_t *)
3241 malloc(bytes, M_DEVBUF, M_WAITOK);
3242 if (ss->tx.seg_list == NULL)
3243 return err;;
3245 /* allocate the tx host info ring */
3246 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3247 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3248 if (ss->tx.info == NULL)
3249 return err;;
3251 /* allocate the tx busdma resources */
3252 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3253 1, /* alignment */
3254 sc->tx_boundary, /* boundary */
3255 BUS_SPACE_MAXADDR, /* low */
3256 BUS_SPACE_MAXADDR, /* high */
3257 NULL, NULL, /* filter */
3258 65536 + 256, /* maxsize */
3259 ss->tx.max_desc - 2, /* num segs */
3260 sc->tx_boundary, /* maxsegsz */
3261 BUS_DMA_ALLOCNOW, /* flags */
3262 NULL, NULL, /* lock */
3263 &ss->tx.dmat); /* tag */
3265 if (err != 0) {
3266 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3267 err);
3268 return err;;
3271 /* now use these tags to setup dmamaps for each slot
3272 in the ring */
3273 for (i = 0; i <= ss->tx.mask; i++) {
3274 err = bus_dmamap_create(ss->tx.dmat, 0,
3275 &ss->tx.info[i].map);
3276 if (err != 0) {
3277 device_printf(sc->dev, "Err %d tx dmamap\n",
3278 err);
3279 return err;;
3282 return 0;
3286 static int
3287 mxge_alloc_rings(mxge_softc_t *sc)
3289 mxge_cmd_t cmd;
3290 int tx_ring_size;
3291 int tx_ring_entries, rx_ring_entries;
3292 int err, slice;
3294 /* get ring sizes */
3295 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3296 tx_ring_size = cmd.data0;
3297 if (err != 0) {
3298 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3299 goto abort;
3302 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3303 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3304 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3305 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3306 IFQ_SET_READY(&sc->ifp->if_snd);
3308 for (slice = 0; slice < sc->num_slices; slice++) {
3309 err = mxge_alloc_slice_rings(&sc->ss[slice],
3310 rx_ring_entries,
3311 tx_ring_entries);
3312 if (err != 0)
3313 goto abort;
3315 return 0;
3317 abort:
3318 mxge_free_rings(sc);
3319 return err;
3324 static void
3325 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3327 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3329 if (bufsize < MCLBYTES) {
3330 /* easy, everything fits in a single buffer */
3331 *big_buf_size = MCLBYTES;
3332 *cl_size = MCLBYTES;
3333 *nbufs = 1;
3334 return;
3337 if (bufsize < MJUMPAGESIZE) {
3338 /* still easy, everything still fits in a single buffer */
3339 *big_buf_size = MJUMPAGESIZE;
3340 *cl_size = MJUMPAGESIZE;
3341 *nbufs = 1;
3342 return;
3344 #if MXGE_VIRT_JUMBOS
3345 /* now we need to use virtually contiguous buffers */
3346 *cl_size = MJUM9BYTES;
3347 *big_buf_size = 4096;
3348 *nbufs = mtu / 4096 + 1;
3349 /* needs to be a power of two, so round up */
3350 if (*nbufs == 3)
3351 *nbufs = 4;
3352 #else
3353 *cl_size = MJUM9BYTES;
3354 *big_buf_size = MJUM9BYTES;
3355 *nbufs = 1;
3356 #endif
3359 static int
3360 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3362 mxge_softc_t *sc;
3363 mxge_cmd_t cmd;
3364 bus_dmamap_t map;
3365 struct lro_entry *lro_entry;
3366 int err, i, slice;
3369 sc = ss->sc;
3370 slice = ss - sc->ss;
3372 SLIST_INIT(&ss->lro_free);
3373 SLIST_INIT(&ss->lro_active);
3375 for (i = 0; i < sc->lro_cnt; i++) {
3376 lro_entry = (struct lro_entry *)
3377 malloc(sizeof (*lro_entry), M_DEVBUF,
3378 M_NOWAIT | M_ZERO);
3379 if (lro_entry == NULL) {
3380 sc->lro_cnt = i;
3381 break;
3383 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3385 /* get the lanai pointers to the send and receive rings */
3387 err = 0;
3388 #ifndef IFNET_BUF_RING
3389 /* We currently only send from the first slice */
3390 if (slice == 0) {
3391 #endif
3392 cmd.data0 = slice;
3393 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3394 ss->tx.lanai =
3395 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3396 ss->tx.send_go = (volatile uint32_t *)
3397 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3398 ss->tx.send_stop = (volatile uint32_t *)
3399 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3400 #ifndef IFNET_BUF_RING
3402 #endif
3403 cmd.data0 = slice;
3404 err |= mxge_send_cmd(sc,
3405 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3406 ss->rx_small.lanai =
3407 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3408 cmd.data0 = slice;
3409 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3410 ss->rx_big.lanai =
3411 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3413 if (err != 0) {
3414 device_printf(sc->dev,
3415 "failed to get ring sizes or locations\n");
3416 return EIO;
3419 /* stock receive rings */
3420 for (i = 0; i <= ss->rx_small.mask; i++) {
3421 map = ss->rx_small.info[i].map;
3422 err = mxge_get_buf_small(ss, map, i);
3423 if (err) {
3424 device_printf(sc->dev, "alloced %d/%d smalls\n",
3425 i, ss->rx_small.mask + 1);
3426 return ENOMEM;
3429 for (i = 0; i <= ss->rx_big.mask; i++) {
3430 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3431 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3433 ss->rx_big.nbufs = nbufs;
3434 ss->rx_big.cl_size = cl_size;
3435 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3436 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3437 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3438 map = ss->rx_big.info[i].map;
3439 err = mxge_get_buf_big(ss, map, i);
3440 if (err) {
3441 device_printf(sc->dev, "alloced %d/%d bigs\n",
3442 i, ss->rx_big.mask + 1);
3443 return ENOMEM;
3446 return 0;
3449 static int
3450 mxge_open(mxge_softc_t *sc)
3452 mxge_cmd_t cmd;
3453 int err, big_bytes, nbufs, slice, cl_size, i;
3454 bus_addr_t bus;
3455 volatile uint8_t *itable;
3456 struct mxge_slice_state *ss;
3458 /* Copy the MAC address in case it was overridden */
3459 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3461 err = mxge_reset(sc, 1);
3462 if (err != 0) {
3463 device_printf(sc->dev, "failed to reset\n");
3464 return EIO;
3467 if (sc->num_slices > 1) {
3468 /* setup the indirection table */
3469 cmd.data0 = sc->num_slices;
3470 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3471 &cmd);
3473 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3474 &cmd);
3475 if (err != 0) {
3476 device_printf(sc->dev,
3477 "failed to setup rss tables\n");
3478 return err;
3481 /* just enable an identity mapping */
3482 itable = sc->sram + cmd.data0;
3483 for (i = 0; i < sc->num_slices; i++)
3484 itable[i] = (uint8_t)i;
3486 cmd.data0 = 1;
3487 cmd.data1 = mxge_rss_hash_type;
3488 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3489 if (err != 0) {
3490 device_printf(sc->dev, "failed to enable slices\n");
3491 return err;
3496 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3498 cmd.data0 = nbufs;
3499 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3500 &cmd);
3501 /* error is only meaningful if we're trying to set
3502 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3503 if (err && nbufs > 1) {
3504 device_printf(sc->dev,
3505 "Failed to set alway-use-n to %d\n",
3506 nbufs);
3507 return EIO;
3509 /* Give the firmware the mtu and the big and small buffer
3510 sizes. The firmware wants the big buf size to be a power
3511 of two. Luckily, FreeBSD's clusters are powers of two */
3512 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3513 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3514 cmd.data0 = MHLEN - MXGEFW_PAD;
3515 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3516 &cmd);
3517 cmd.data0 = big_bytes;
3518 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3520 if (err != 0) {
3521 device_printf(sc->dev, "failed to setup params\n");
3522 goto abort;
3525 /* Now give him the pointer to the stats block */
3526 for (slice = 0;
3527 #ifdef IFNET_BUF_RING
3528 slice < sc->num_slices;
3529 #else
3530 slice < 1;
3531 #endif
3532 slice++) {
3533 ss = &sc->ss[slice];
3534 cmd.data0 =
3535 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3536 cmd.data1 =
3537 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3538 cmd.data2 = sizeof(struct mcp_irq_data);
3539 cmd.data2 |= (slice << 16);
3540 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3543 if (err != 0) {
3544 bus = sc->ss->fw_stats_dma.bus_addr;
3545 bus += offsetof(struct mcp_irq_data, send_done_count);
3546 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3547 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3548 err = mxge_send_cmd(sc,
3549 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3550 &cmd);
3551 /* Firmware cannot support multicast without STATS_DMA_V2 */
3552 sc->fw_multicast_support = 0;
3553 } else {
3554 sc->fw_multicast_support = 1;
3557 if (err != 0) {
3558 device_printf(sc->dev, "failed to setup params\n");
3559 goto abort;
3562 for (slice = 0; slice < sc->num_slices; slice++) {
3563 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3564 if (err != 0) {
3565 device_printf(sc->dev, "couldn't open slice %d\n",
3566 slice);
3567 goto abort;
3571 /* Finally, start the firmware running */
3572 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3573 if (err) {
3574 device_printf(sc->dev, "Couldn't bring up link\n");
3575 goto abort;
3577 #ifdef IFNET_BUF_RING
3578 for (slice = 0; slice < sc->num_slices; slice++) {
3579 ss = &sc->ss[slice];
3580 ss->if_drv_flags |= IFF_DRV_RUNNING;
3581 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3583 #endif
3584 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3585 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3586 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3588 return 0;
3591 abort:
3592 mxge_free_mbufs(sc);
3594 return err;
3597 static int
3598 mxge_close(mxge_softc_t *sc)
3600 mxge_cmd_t cmd;
3601 int err, old_down_cnt;
3602 #ifdef IFNET_BUF_RING
3603 struct mxge_slice_state *ss;
3604 int slice;
3605 #endif
3607 callout_stop(&sc->co_hdl);
3608 #ifdef IFNET_BUF_RING
3609 for (slice = 0; slice < sc->num_slices; slice++) {
3610 ss = &sc->ss[slice];
3611 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3613 #endif
3614 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3615 old_down_cnt = sc->down_cnt;
3616 wmb();
3617 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3618 if (err) {
3619 device_printf(sc->dev, "Couldn't bring down link\n");
3621 if (old_down_cnt == sc->down_cnt) {
3622 /* wait for down irq */
3623 DELAY(10 * sc->intr_coal_delay);
3625 wmb();
3626 if (old_down_cnt == sc->down_cnt) {
3627 device_printf(sc->dev, "never got down irq\n");
3630 mxge_free_mbufs(sc);
3632 return 0;
3635 static void
3636 mxge_setup_cfg_space(mxge_softc_t *sc)
3638 device_t dev = sc->dev;
3639 int reg;
3640 uint16_t cmd, lnk, pectl;
3642 /* find the PCIe link width and set max read request to 4KB*/
3643 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3644 lnk = pci_read_config(dev, reg + 0x12, 2);
3645 sc->link_width = (lnk >> 4) & 0x3f;
3647 pectl = pci_read_config(dev, reg + 0x8, 2);
3648 pectl = (pectl & ~0x7000) | (5 << 12);
3649 pci_write_config(dev, reg + 0x8, pectl, 2);
3652 /* Enable DMA and Memory space access */
3653 pci_enable_busmaster(dev);
3654 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3655 cmd |= PCIM_CMD_MEMEN;
3656 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3659 static uint32_t
3660 mxge_read_reboot(mxge_softc_t *sc)
3662 device_t dev = sc->dev;
3663 uint32_t vs;
3665 /* find the vendor specific offset */
3666 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3667 device_printf(sc->dev,
3668 "could not find vendor specific offset\n");
3669 return (uint32_t)-1;
3671 /* enable read32 mode */
3672 pci_write_config(dev, vs + 0x10, 0x3, 1);
3673 /* tell NIC which register to read */
3674 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3675 return (pci_read_config(dev, vs + 0x14, 4));
3678 static int
3679 mxge_watchdog_reset(mxge_softc_t *sc, int slice)
3681 struct pci_devinfo *dinfo;
3682 mxge_tx_ring_t *tx;
3683 int err;
3684 uint32_t reboot;
3685 uint16_t cmd;
3687 err = ENXIO;
3689 device_printf(sc->dev, "Watchdog reset!\n");
3692 * check to see if the NIC rebooted. If it did, then all of
3693 * PCI config space has been reset, and things like the
3694 * busmaster bit will be zero. If this is the case, then we
3695 * must restore PCI config space before the NIC can be used
3696 * again
3698 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3699 if (cmd == 0xffff) {
3701 * maybe the watchdog caught the NIC rebooting; wait
3702 * up to 100ms for it to finish. If it does not come
3703 * back, then give up
3705 DELAY(1000*100);
3706 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3707 if (cmd == 0xffff) {
3708 device_printf(sc->dev, "NIC disappeared!\n");
3709 return (err);
3712 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3713 /* print the reboot status */
3714 reboot = mxge_read_reboot(sc);
3715 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3716 reboot);
3717 /* restore PCI configuration space */
3718 dinfo = device_get_ivars(sc->dev);
3719 pci_cfg_restore(sc->dev, dinfo);
3721 /* and redo any changes we made to our config space */
3722 mxge_setup_cfg_space(sc);
3724 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) {
3725 mxge_close(sc);
3726 err = mxge_open(sc);
3728 } else {
3729 tx = &sc->ss[slice].tx;
3730 device_printf(sc->dev,
3731 "NIC did not reboot, slice %d ring state:\n",
3732 slice);
3733 device_printf(sc->dev,
3734 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3735 tx->req, tx->done, tx->queue_active);
3736 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3737 tx->activate, tx->deactivate);
3738 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3739 tx->pkt_done,
3740 be32toh(sc->ss->fw_stats->send_done_count));
3741 device_printf(sc->dev, "not resetting\n");
3743 return (err);
3746 static int
3747 mxge_watchdog(mxge_softc_t *sc)
3749 mxge_tx_ring_t *tx;
3750 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3751 int i, err = 0;
3753 /* see if we have outstanding transmits, which
3754 have been pending for more than mxge_ticks */
3755 for (i = 0;
3756 #ifdef IFNET_BUF_RING
3757 (i < sc->num_slices) && (err == 0);
3758 #else
3759 (i < 1) && (err == 0);
3760 #endif
3761 i++) {
3762 tx = &sc->ss[i].tx;
3763 if (tx->req != tx->done &&
3764 tx->watchdog_req != tx->watchdog_done &&
3765 tx->done == tx->watchdog_done) {
3766 /* check for pause blocking before resetting */
3767 if (tx->watchdog_rx_pause == rx_pause)
3768 err = mxge_watchdog_reset(sc, i);
3769 else
3770 device_printf(sc->dev, "Flow control blocking "
3771 "xmits, check link partner\n");
3774 tx->watchdog_req = tx->req;
3775 tx->watchdog_done = tx->done;
3776 tx->watchdog_rx_pause = rx_pause;
3779 if (sc->need_media_probe)
3780 mxge_media_probe(sc);
3781 return (err);
3784 static void
3785 mxge_update_stats(mxge_softc_t *sc)
3787 struct mxge_slice_state *ss;
3788 u_long ipackets = 0;
3789 u_long opackets = 0;
3790 #ifdef IFNET_BUF_RING
3791 u_long obytes = 0;
3792 u_long omcasts = 0;
3793 u_long odrops = 0;
3794 #endif
3795 u_long oerrors = 0;
3796 int slice;
3798 for (slice = 0; slice < sc->num_slices; slice++) {
3799 ss = &sc->ss[slice];
3800 ipackets += ss->ipackets;
3801 opackets += ss->opackets;
3802 #ifdef IFNET_BUF_RING
3803 obytes += ss->obytes;
3804 omcasts += ss->omcasts;
3805 odrops += ss->tx.br->br_drops;
3806 #endif
3807 oerrors += ss->oerrors;
3809 sc->ifp->if_ipackets = ipackets;
3810 sc->ifp->if_opackets = opackets;
3811 #ifdef IFNET_BUF_RING
3812 sc->ifp->if_obytes = obytes;
3813 sc->ifp->if_omcasts = omcasts;
3814 sc->ifp->if_snd.ifq_drops = odrops;
3815 #endif
3816 sc->ifp->if_oerrors = oerrors;
3819 static void
3820 mxge_tick(void *arg)
3822 mxge_softc_t *sc = arg;
3823 int err = 0;
3825 /* aggregate stats from different slices */
3826 mxge_update_stats(sc);
3827 if (!sc->watchdog_countdown) {
3828 err = mxge_watchdog(sc);
3829 sc->watchdog_countdown = 4;
3831 sc->watchdog_countdown--;
3832 if (err == 0)
3833 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3837 static int
3838 mxge_media_change(struct ifnet *ifp)
3840 return EINVAL;
3843 static int
3844 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3846 struct ifnet *ifp = sc->ifp;
3847 int real_mtu, old_mtu;
3848 int err = 0;
3851 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3852 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3853 return EINVAL;
3854 mtx_lock(&sc->driver_mtx);
3855 old_mtu = ifp->if_mtu;
3856 ifp->if_mtu = mtu;
3857 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3858 mxge_close(sc);
3859 err = mxge_open(sc);
3860 if (err != 0) {
3861 ifp->if_mtu = old_mtu;
3862 mxge_close(sc);
3863 (void) mxge_open(sc);
3866 mtx_unlock(&sc->driver_mtx);
3867 return err;
3870 static void
3871 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3873 mxge_softc_t *sc = ifp->if_softc;
3876 if (sc == NULL)
3877 return;
3878 ifmr->ifm_status = IFM_AVALID;
3879 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3880 ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
3881 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
3884 static int
3885 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
3887 mxge_softc_t *sc = ifp->if_softc;
3888 struct ifreq *ifr = (struct ifreq *)data;
3889 int err, mask;
3891 err = 0;
3892 switch (command) {
3893 case SIOCSIFADDR:
3894 case SIOCGIFADDR:
3895 err = ether_ioctl(ifp, command, data);
3896 break;
3898 case SIOCSIFMTU:
3899 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3900 break;
3902 case SIOCSIFFLAGS:
3903 mtx_lock(&sc->driver_mtx);
3904 if (sc->dying) {
3905 mtx_unlock(&sc->driver_mtx);
3906 return EINVAL;
3908 if (ifp->if_flags & IFF_UP) {
3909 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3910 err = mxge_open(sc);
3911 } else {
3912 /* take care of promis can allmulti
3913 flag chages */
3914 mxge_change_promisc(sc,
3915 ifp->if_flags & IFF_PROMISC);
3916 mxge_set_multicast_list(sc);
3918 } else {
3919 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3920 mxge_close(sc);
3923 mtx_unlock(&sc->driver_mtx);
3924 break;
3926 case SIOCADDMULTI:
3927 case SIOCDELMULTI:
3928 mtx_lock(&sc->driver_mtx);
3929 mxge_set_multicast_list(sc);
3930 mtx_unlock(&sc->driver_mtx);
3931 break;
3933 case SIOCSIFCAP:
3934 mtx_lock(&sc->driver_mtx);
3935 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3936 if (mask & IFCAP_TXCSUM) {
3937 if (IFCAP_TXCSUM & ifp->if_capenable) {
3938 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
3939 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
3940 | CSUM_TSO);
3941 } else {
3942 ifp->if_capenable |= IFCAP_TXCSUM;
3943 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
3945 } else if (mask & IFCAP_RXCSUM) {
3946 if (IFCAP_RXCSUM & ifp->if_capenable) {
3947 ifp->if_capenable &= ~IFCAP_RXCSUM;
3948 sc->csum_flag = 0;
3949 } else {
3950 ifp->if_capenable |= IFCAP_RXCSUM;
3951 sc->csum_flag = 1;
3954 if (mask & IFCAP_TSO4) {
3955 if (IFCAP_TSO4 & ifp->if_capenable) {
3956 ifp->if_capenable &= ~IFCAP_TSO4;
3957 ifp->if_hwassist &= ~CSUM_TSO;
3958 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
3959 ifp->if_capenable |= IFCAP_TSO4;
3960 ifp->if_hwassist |= CSUM_TSO;
3961 } else {
3962 printf("mxge requires tx checksum offload"
3963 " be enabled to use TSO\n");
3964 err = EINVAL;
3967 if (mask & IFCAP_LRO) {
3968 if (IFCAP_LRO & ifp->if_capenable)
3969 err = mxge_change_lro_locked(sc, 0);
3970 else
3971 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
3973 if (mask & IFCAP_VLAN_HWTAGGING)
3974 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3975 mtx_unlock(&sc->driver_mtx);
3976 VLAN_CAPABILITIES(ifp);
3978 break;
3980 case SIOCGIFMEDIA:
3981 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
3982 &sc->media, command);
3983 break;
3985 default:
3986 err = ENOTTY;
3988 return err;
3991 static void
3992 mxge_fetch_tunables(mxge_softc_t *sc)
3995 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
3996 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
3997 &mxge_flow_control);
3998 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
3999 &mxge_intr_coal_delay);
4000 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4001 &mxge_nvidia_ecrc_enable);
4002 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4003 &mxge_force_firmware);
4004 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4005 &mxge_deassert_wait);
4006 TUNABLE_INT_FETCH("hw.mxge.verbose",
4007 &mxge_verbose);
4008 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4009 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
4010 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4011 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4012 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4013 if (sc->lro_cnt != 0)
4014 mxge_lro_cnt = sc->lro_cnt;
4016 if (bootverbose)
4017 mxge_verbose = 1;
4018 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4019 mxge_intr_coal_delay = 30;
4020 if (mxge_ticks == 0)
4021 mxge_ticks = hz / 2;
4022 sc->pause = mxge_flow_control;
4023 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4024 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4025 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
4027 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4028 mxge_initial_mtu < ETHER_MIN_LEN)
4029 mxge_initial_mtu = ETHERMTU_JUMBO;
4033 static void
4034 mxge_free_slices(mxge_softc_t *sc)
4036 struct mxge_slice_state *ss;
4037 int i;
4040 if (sc->ss == NULL)
4041 return;
4043 for (i = 0; i < sc->num_slices; i++) {
4044 ss = &sc->ss[i];
4045 if (ss->fw_stats != NULL) {
4046 mxge_dma_free(&ss->fw_stats_dma);
4047 ss->fw_stats = NULL;
4048 #ifdef IFNET_BUF_RING
4049 if (ss->tx.br != NULL) {
4050 drbr_free(ss->tx.br, M_DEVBUF);
4051 ss->tx.br = NULL;
4053 #endif
4054 mtx_destroy(&ss->tx.mtx);
4056 if (ss->rx_done.entry != NULL) {
4057 mxge_dma_free(&ss->rx_done.dma);
4058 ss->rx_done.entry = NULL;
4061 free(sc->ss, M_DEVBUF);
4062 sc->ss = NULL;
4065 static int
4066 mxge_alloc_slices(mxge_softc_t *sc)
4068 mxge_cmd_t cmd;
4069 struct mxge_slice_state *ss;
4070 size_t bytes;
4071 int err, i, max_intr_slots;
4073 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4074 if (err != 0) {
4075 device_printf(sc->dev, "Cannot determine rx ring size\n");
4076 return err;
4078 sc->rx_ring_size = cmd.data0;
4079 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4081 bytes = sizeof (*sc->ss) * sc->num_slices;
4082 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4083 if (sc->ss == NULL)
4084 return (ENOMEM);
4085 for (i = 0; i < sc->num_slices; i++) {
4086 ss = &sc->ss[i];
4088 ss->sc = sc;
4090 /* allocate per-slice rx interrupt queues */
4092 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4093 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4094 if (err != 0)
4095 goto abort;
4096 ss->rx_done.entry = ss->rx_done.dma.addr;
4097 bzero(ss->rx_done.entry, bytes);
4100 * allocate the per-slice firmware stats; stats
4101 * (including tx) are used used only on the first
4102 * slice for now
4104 #ifndef IFNET_BUF_RING
4105 if (i > 0)
4106 continue;
4107 #endif
4109 bytes = sizeof (*ss->fw_stats);
4110 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4111 sizeof (*ss->fw_stats), 64);
4112 if (err != 0)
4113 goto abort;
4114 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4115 snprintf(ss->tx.lock_name, sizeof(ss->tx.mtx_name),
4116 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4117 lock_init(&ss->tx.lock, ss->tx.lock_name, 0, LK_CANRECURSE);
4118 #ifdef IFNET_BUF_RING
4119 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4120 &ss->tx.mtx);
4121 #endif
4124 return (0);
4126 abort:
4127 mxge_free_slices(sc);
4128 return (ENOMEM);
4131 static void
4132 mxge_slice_probe(mxge_softc_t *sc)
4134 mxge_cmd_t cmd;
4135 char *old_fw;
4136 int msix_cnt, status, max_intr_slots;
4138 sc->num_slices = 1;
4140 * don't enable multiple slices if they are not enabled,
4141 * or if this is not an SMP system
4144 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4145 return;
4147 /* see how many MSI-X interrupts are available */
4148 msix_cnt = pci_msix_count(sc->dev);
4149 if (msix_cnt < 2)
4150 return;
4152 /* now load the slice aware firmware see what it supports */
4153 old_fw = sc->fw_name;
4154 if (old_fw == mxge_fw_aligned)
4155 sc->fw_name = mxge_fw_rss_aligned;
4156 else
4157 sc->fw_name = mxge_fw_rss_unaligned;
4158 status = mxge_load_firmware(sc, 0);
4159 if (status != 0) {
4160 device_printf(sc->dev, "Falling back to a single slice\n");
4161 return;
4164 /* try to send a reset command to the card to see if it
4165 is alive */
4166 memset(&cmd, 0, sizeof (cmd));
4167 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4168 if (status != 0) {
4169 device_printf(sc->dev, "failed reset\n");
4170 goto abort_with_fw;
4173 /* get rx ring size */
4174 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4175 if (status != 0) {
4176 device_printf(sc->dev, "Cannot determine rx ring size\n");
4177 goto abort_with_fw;
4179 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4181 /* tell it the size of the interrupt queues */
4182 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4183 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4184 if (status != 0) {
4185 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4186 goto abort_with_fw;
4189 /* ask the maximum number of slices it supports */
4190 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4191 if (status != 0) {
4192 device_printf(sc->dev,
4193 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4194 goto abort_with_fw;
4196 sc->num_slices = cmd.data0;
4197 if (sc->num_slices > msix_cnt)
4198 sc->num_slices = msix_cnt;
4200 if (mxge_max_slices == -1) {
4201 /* cap to number of CPUs in system */
4202 if (sc->num_slices > mp_ncpus)
4203 sc->num_slices = mp_ncpus;
4204 } else {
4205 if (sc->num_slices > mxge_max_slices)
4206 sc->num_slices = mxge_max_slices;
4208 /* make sure it is a power of two */
4209 while (sc->num_slices & (sc->num_slices - 1))
4210 sc->num_slices--;
4212 if (mxge_verbose)
4213 device_printf(sc->dev, "using %d slices\n",
4214 sc->num_slices);
4216 return;
4218 abort_with_fw:
4219 sc->fw_name = old_fw;
4220 (void) mxge_load_firmware(sc, 0);
4223 static int
4224 mxge_add_msix_irqs(mxge_softc_t *sc)
4226 size_t bytes;
4227 int count, err, i, rid;
4229 rid = PCIR_BAR(2);
4230 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4231 &rid, RF_ACTIVE);
4233 if (sc->msix_table_res == NULL) {
4234 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4235 return ENXIO;
4238 count = sc->num_slices;
4239 err = pci_alloc_msix(sc->dev, &count);
4240 if (err != 0) {
4241 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4242 "err = %d \n", sc->num_slices, err);
4243 goto abort_with_msix_table;
4245 if (count < sc->num_slices) {
4246 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4247 count, sc->num_slices);
4248 device_printf(sc->dev,
4249 "Try setting hw.mxge.max_slices to %d\n",
4250 count);
4251 err = ENOSPC;
4252 goto abort_with_msix;
4254 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4255 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4256 if (sc->msix_irq_res == NULL) {
4257 err = ENOMEM;
4258 goto abort_with_msix;
4261 for (i = 0; i < sc->num_slices; i++) {
4262 rid = i + 1;
4263 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4264 SYS_RES_IRQ,
4265 &rid, RF_ACTIVE);
4266 if (sc->msix_irq_res[i] == NULL) {
4267 device_printf(sc->dev, "couldn't allocate IRQ res"
4268 " for message %d\n", i);
4269 err = ENXIO;
4270 goto abort_with_res;
4274 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4275 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4277 for (i = 0; i < sc->num_slices; i++) {
4278 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4279 INTR_TYPE_NET | INTR_MPSAFE,
4280 #if __FreeBSD_version > 700030
4281 NULL,
4282 #endif
4283 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4284 if (err != 0) {
4285 device_printf(sc->dev, "couldn't setup intr for "
4286 "message %d\n", i);
4287 goto abort_with_intr;
4291 if (mxge_verbose) {
4292 device_printf(sc->dev, "using %d msix IRQs:",
4293 sc->num_slices);
4294 for (i = 0; i < sc->num_slices; i++)
4295 printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4296 printf("\n");
4298 return (0);
4300 abort_with_intr:
4301 for (i = 0; i < sc->num_slices; i++) {
4302 if (sc->msix_ih[i] != NULL) {
4303 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4304 sc->msix_ih[i]);
4305 sc->msix_ih[i] = NULL;
4308 free(sc->msix_ih, M_DEVBUF);
4311 abort_with_res:
4312 for (i = 0; i < sc->num_slices; i++) {
4313 rid = i + 1;
4314 if (sc->msix_irq_res[i] != NULL)
4315 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4316 sc->msix_irq_res[i]);
4317 sc->msix_irq_res[i] = NULL;
4319 free(sc->msix_irq_res, M_DEVBUF);
4322 abort_with_msix:
4323 pci_release_msi(sc->dev);
4325 abort_with_msix_table:
4326 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4327 sc->msix_table_res);
4329 return err;
4332 static int
4333 mxge_add_single_irq(mxge_softc_t *sc)
4335 int count, err, rid;
4337 count = pci_msi_count(sc->dev);
4338 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4339 rid = 1;
4340 } else {
4341 rid = 0;
4342 sc->legacy_irq = 1;
4344 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4345 1, RF_SHAREABLE | RF_ACTIVE);
4346 if (sc->irq_res == NULL) {
4347 device_printf(sc->dev, "could not alloc interrupt\n");
4348 return ENXIO;
4350 if (mxge_verbose)
4351 device_printf(sc->dev, "using %s irq %ld\n",
4352 sc->legacy_irq ? "INTx" : "MSI",
4353 rman_get_start(sc->irq_res));
4354 err = bus_setup_intr(sc->dev, sc->irq_res,
4355 INTR_TYPE_NET | INTR_MPSAFE,
4356 #if __FreeBSD_version > 700030
4357 NULL,
4358 #endif
4359 mxge_intr, &sc->ss[0], &sc->ih);
4360 if (err != 0) {
4361 bus_release_resource(sc->dev, SYS_RES_IRQ,
4362 sc->legacy_irq ? 0 : 1, sc->irq_res);
4363 if (!sc->legacy_irq)
4364 pci_release_msi(sc->dev);
4366 return err;
4369 static void
4370 mxge_rem_msix_irqs(mxge_softc_t *sc)
4372 int i, rid;
4374 for (i = 0; i < sc->num_slices; i++) {
4375 if (sc->msix_ih[i] != NULL) {
4376 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4377 sc->msix_ih[i]);
4378 sc->msix_ih[i] = NULL;
4381 free(sc->msix_ih, M_DEVBUF);
4383 for (i = 0; i < sc->num_slices; i++) {
4384 rid = i + 1;
4385 if (sc->msix_irq_res[i] != NULL)
4386 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4387 sc->msix_irq_res[i]);
4388 sc->msix_irq_res[i] = NULL;
4390 free(sc->msix_irq_res, M_DEVBUF);
4392 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4393 sc->msix_table_res);
4395 pci_release_msi(sc->dev);
4396 return;
4399 static void
4400 mxge_rem_single_irq(mxge_softc_t *sc)
4402 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4403 bus_release_resource(sc->dev, SYS_RES_IRQ,
4404 sc->legacy_irq ? 0 : 1, sc->irq_res);
4405 if (!sc->legacy_irq)
4406 pci_release_msi(sc->dev);
4409 static void
4410 mxge_rem_irq(mxge_softc_t *sc)
4412 if (sc->num_slices > 1)
4413 mxge_rem_msix_irqs(sc);
4414 else
4415 mxge_rem_single_irq(sc);
4418 static int
4419 mxge_add_irq(mxge_softc_t *sc)
4421 int err;
4423 if (sc->num_slices > 1)
4424 err = mxge_add_msix_irqs(sc);
4425 else
4426 err = mxge_add_single_irq(sc);
4428 if (0 && err == 0 && sc->num_slices > 1) {
4429 mxge_rem_msix_irqs(sc);
4430 err = mxge_add_msix_irqs(sc);
4432 return err;
4436 static int
4437 mxge_attach(device_t dev)
4439 mxge_softc_t *sc = device_get_softc(dev);
4440 struct ifnet *ifp;
4441 int err, rid;
4443 sc->dev = dev;
4444 mxge_fetch_tunables(sc);
4446 err = bus_dma_tag_create(NULL, /* parent */
4447 1, /* alignment */
4448 0, /* boundary */
4449 BUS_SPACE_MAXADDR, /* low */
4450 BUS_SPACE_MAXADDR, /* high */
4451 NULL, NULL, /* filter */
4452 65536 + 256, /* maxsize */
4453 MXGE_MAX_SEND_DESC, /* num segs */
4454 65536, /* maxsegsize */
4455 0, /* flags */
4456 NULL, NULL, /* lock */
4457 &sc->parent_dmat); /* tag */
4459 if (err != 0) {
4460 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4461 err);
4462 goto abort_with_nothing;
4465 ifp = sc->ifp = if_alloc(IFT_ETHER);
4466 if (ifp == NULL) {
4467 device_printf(dev, "can not if_alloc()\n");
4468 err = ENOSPC;
4469 goto abort_with_parent_dmat;
4471 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4473 snprintf(sc->cmd_lock_name, sizeof(sc->cmd_lock_name), "%s:cmd",
4474 device_get_nameunit(dev));
4475 lock_init(&sc->cmd_lock, sc->cmd_lock_name, 0, LK_CANRECURSE);
4476 snprintf(sc->driver_lock_name, sizeof(sc->driver_lock_name),
4477 "%s:drv", device_get_nameunit(dev));
4478 lock_init(&sc->driver_lock, sc->driver_lock_name,
4479 0, LK_CANRECURSE);
4481 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0);
4483 mxge_setup_cfg_space(sc);
4485 /* Map the board into the kernel */
4486 rid = PCIR_BARS;
4487 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4488 ~0, 1, RF_ACTIVE);
4489 if (sc->mem_res == NULL) {
4490 device_printf(dev, "could not map memory\n");
4491 err = ENXIO;
4492 goto abort_with_lock;
4494 sc->sram = rman_get_virtual(sc->mem_res);
4495 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4496 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4497 device_printf(dev, "impossible memory region size %ld\n",
4498 rman_get_size(sc->mem_res));
4499 err = ENXIO;
4500 goto abort_with_mem_res;
4503 /* make NULL terminated copy of the EEPROM strings section of
4504 lanai SRAM */
4505 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4506 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4507 rman_get_bushandle(sc->mem_res),
4508 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4509 sc->eeprom_strings,
4510 MXGE_EEPROM_STRINGS_SIZE - 2);
4511 err = mxge_parse_strings(sc);
4512 if (err != 0)
4513 goto abort_with_mem_res;
4515 /* Enable write combining for efficient use of PCIe bus */
4516 mxge_enable_wc(sc);
4518 /* Allocate the out of band dma memory */
4519 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4520 sizeof (mxge_cmd_t), 64);
4521 if (err != 0)
4522 goto abort_with_mem_res;
4523 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4524 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4525 if (err != 0)
4526 goto abort_with_cmd_dma;
4528 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4529 if (err != 0)
4530 goto abort_with_zeropad_dma;
4532 /* select & load the firmware */
4533 err = mxge_select_firmware(sc);
4534 if (err != 0)
4535 goto abort_with_dmabench;
4536 sc->intr_coal_delay = mxge_intr_coal_delay;
4538 mxge_slice_probe(sc);
4539 err = mxge_alloc_slices(sc);
4540 if (err != 0)
4541 goto abort_with_dmabench;
4543 err = mxge_reset(sc, 0);
4544 if (err != 0)
4545 goto abort_with_slices;
4547 err = mxge_alloc_rings(sc);
4548 if (err != 0) {
4549 device_printf(sc->dev, "failed to allocate rings\n");
4550 goto abort_with_dmabench;
4553 err = mxge_add_irq(sc);
4554 if (err != 0) {
4555 device_printf(sc->dev, "failed to add irq\n");
4556 goto abort_with_rings;
4559 ifp->if_baudrate = IF_Gbps(10UL);
4560 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4561 IFCAP_VLAN_MTU;
4562 #ifdef INET
4563 ifp->if_capabilities |= IFCAP_LRO;
4564 #endif
4566 #ifdef MXGE_NEW_VLAN_API
4567 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4568 #endif
4570 sc->max_mtu = mxge_max_mtu(sc);
4571 if (sc->max_mtu >= 9000)
4572 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4573 else
4574 device_printf(dev, "MTU limited to %d. Install "
4575 "latest firmware for 9000 byte jumbo support\n",
4576 sc->max_mtu - ETHER_HDR_LEN);
4577 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4578 ifp->if_capenable = ifp->if_capabilities;
4579 if (sc->lro_cnt == 0)
4580 ifp->if_capenable &= ~IFCAP_LRO;
4581 sc->csum_flag = 1;
4582 ifp->if_init = mxge_init;
4583 ifp->if_softc = sc;
4584 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4585 ifp->if_ioctl = mxge_ioctl;
4586 ifp->if_start = mxge_start;
4587 /* Initialise the ifmedia structure */
4588 ifmedia_init(&sc->media, 0, mxge_media_change,
4589 mxge_media_status);
4590 mxge_set_media(sc, IFM_ETHER | IFM_AUTO);
4591 mxge_media_probe(sc);
4592 sc->dying = 0;
4593 ether_ifattach(ifp, sc->mac_addr);
4594 /* ether_ifattach sets mtu to ETHERMTU */
4595 if (mxge_initial_mtu != ETHERMTU)
4596 mxge_change_mtu(sc, mxge_initial_mtu);
4598 mxge_add_sysctls(sc);
4599 #ifdef IFNET_BUF_RING
4600 ifp->if_transmit = mxge_transmit;
4601 ifp->if_qflush = mxge_qflush;
4602 #endif
4603 return 0;
4605 abort_with_rings:
4606 mxge_free_rings(sc);
4607 abort_with_slices:
4608 mxge_free_slices(sc);
4609 abort_with_dmabench:
4610 mxge_dma_free(&sc->dmabench_dma);
4611 abort_with_zeropad_dma:
4612 mxge_dma_free(&sc->zeropad_dma);
4613 abort_with_cmd_dma:
4614 mxge_dma_free(&sc->cmd_dma);
4615 abort_with_mem_res:
4616 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4617 abort_with_lock:
4618 pci_disable_busmaster(dev);
4619 mtx_destroy(&sc->cmd_mtx);
4620 mtx_destroy(&sc->driver_mtx);
4621 if_free(ifp);
4622 abort_with_parent_dmat:
4623 bus_dma_tag_destroy(sc->parent_dmat);
4625 abort_with_nothing:
4626 return err;
4629 static int
4630 mxge_detach(device_t dev)
4632 mxge_softc_t *sc = device_get_softc(dev);
4634 if (mxge_vlans_active(sc)) {
4635 device_printf(sc->dev,
4636 "Detach vlans before removing module\n");
4637 return EBUSY;
4639 mtx_lock(&sc->driver_mtx);
4640 sc->dying = 1;
4641 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4642 mxge_close(sc);
4643 mtx_unlock(&sc->driver_mtx);
4644 ether_ifdetach(sc->ifp);
4645 callout_drain(&sc->co_hdl);
4646 ifmedia_removeall(&sc->media);
4647 mxge_dummy_rdma(sc, 0);
4648 mxge_rem_sysctls(sc);
4649 mxge_rem_irq(sc);
4650 mxge_free_rings(sc);
4651 mxge_free_slices(sc);
4652 mxge_dma_free(&sc->dmabench_dma);
4653 mxge_dma_free(&sc->zeropad_dma);
4654 mxge_dma_free(&sc->cmd_dma);
4655 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4656 pci_disable_busmaster(dev);
4657 mtx_destroy(&sc->cmd_mtx);
4658 mtx_destroy(&sc->driver_mtx);
4659 if_free(sc->ifp);
4660 bus_dma_tag_destroy(sc->parent_dmat);
4661 return 0;
4664 static int
4665 mxge_shutdown(device_t dev)
4667 return 0;
4671 This file uses Myri10GE driver indentation.
4673 Local Variables:
4674 c-file-style:"linux"
4675 tab-width:8
4676 End: