get mxge to build, stage 5/many
[dragonfly.git] / sys / dev / netif / mxge / if_mxge.c
blob9acaa5d7857ac0ee849e9b7e522e548fef73dc50
1 /******************************************************************************
3 Copyright (c) 2006-2009, Myricom Inc.
4 All rights reserved.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 /*__FBSDID("$FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $");*/
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/sockio.h>
39 #include <sys/mbuf.h>
40 #include <sys/malloc.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/module.h>
44 #include <sys/socket.h>
45 #include <sys/sysctl.h>
47 /* count xmits ourselves, rather than via drbr */
48 #define NO_SLOW_STATS
49 #include <net/if.h>
50 #include <net/if_arp.h>
51 #include <net/ethernet.h>
52 #include <net/if_dl.h>
53 #include <net/if_media.h>
55 #include <net/bpf.h>
57 #include <net/if_types.h>
58 #include <net/vlan/if_vlan_var.h>
59 #include <net/zlib.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/tcp.h>
66 #include <machine/resource.h>
67 #include <sys/bus.h>
68 #include <sys/rman.h>
70 #include <bus/pci/pcireg.h>
71 #include <bus/pci/pcivar.h>
72 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
74 #include <vm/vm.h> /* for pmap_mapdev() */
75 #include <vm/pmap.h>
77 #if defined(__i386) || defined(__amd64)
78 #include <machine/specialreg.h>
79 #endif
81 #include <dev/netif/mxge/mxge_mcp.h>
82 #include <dev/netif/mxge/mcp_gen_header.h>
83 /*#define MXGE_FAKE_IFP*/
84 #include <dev/netif/mxge/if_mxge_var.h>
85 #ifdef IFNET_BUF_RING
86 #include <sys/buf_ring.h>
87 #endif
89 #include "opt_inet.h"
91 /* tunable params */
92 static int mxge_nvidia_ecrc_enable = 1;
93 static int mxge_force_firmware = 0;
94 static int mxge_intr_coal_delay = 30;
95 static int mxge_deassert_wait = 1;
96 static int mxge_flow_control = 1;
97 static int mxge_verbose = 0;
98 static int mxge_lro_cnt = 8;
99 static int mxge_ticks;
100 static int mxge_max_slices = 1;
101 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
102 static int mxge_always_promisc = 0;
103 static int mxge_initial_mtu = ETHERMTU_JUMBO;
104 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
105 static char *mxge_fw_aligned = "mxge_eth_z8e";
106 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
107 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
109 static int mxge_probe(device_t dev);
110 static int mxge_attach(device_t dev);
111 static int mxge_detach(device_t dev);
112 static int mxge_shutdown(device_t dev);
113 static void mxge_intr(void *arg);
115 static device_method_t mxge_methods[] =
117 /* Device interface */
118 DEVMETHOD(device_probe, mxge_probe),
119 DEVMETHOD(device_attach, mxge_attach),
120 DEVMETHOD(device_detach, mxge_detach),
121 DEVMETHOD(device_shutdown, mxge_shutdown),
122 {0, 0}
125 static driver_t mxge_driver =
127 "mxge",
128 mxge_methods,
129 sizeof(mxge_softc_t),
132 static devclass_t mxge_devclass;
134 /* Declare ourselves to be a child of the PCI bus.*/
135 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
136 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
137 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
139 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
140 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
141 static int mxge_close(mxge_softc_t *sc);
142 static int mxge_open(mxge_softc_t *sc);
143 static void mxge_tick(void *arg);
145 static int
146 mxge_probe(device_t dev)
148 int rev;
151 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
152 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
153 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
154 rev = pci_get_revid(dev);
155 switch (rev) {
156 case MXGE_PCI_REV_Z8E:
157 device_set_desc(dev, "Myri10G-PCIE-8A");
158 break;
159 case MXGE_PCI_REV_Z8ES:
160 device_set_desc(dev, "Myri10G-PCIE-8B");
161 break;
162 default:
163 device_set_desc(dev, "Myri10G-PCIE-8??");
164 device_printf(dev, "Unrecognized rev %d NIC\n",
165 rev);
166 break;
168 return 0;
170 return ENXIO;
173 static void
174 mxge_enable_wc(mxge_softc_t *sc)
176 #if defined(__i386) || defined(__amd64)
177 vm_offset_t len;
178 int err;
180 sc->wc = 1;
181 len = rman_get_size(sc->mem_res);
182 err = pmap_change_attr((vm_offset_t) sc->sram,
183 len, PAT_WRITE_COMBINING);
184 if (err != 0) {
185 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
186 err);
187 sc->wc = 0;
189 #endif
193 /* callback to get our DMA address */
194 static void
195 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
196 int error)
198 if (error == 0) {
199 *(bus_addr_t *) arg = segs->ds_addr;
203 static int
204 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
205 bus_size_t alignment)
207 int err;
208 device_t dev = sc->dev;
209 bus_size_t boundary, maxsegsize;
211 if (bytes > 4096 && alignment == 4096) {
212 boundary = 0;
213 maxsegsize = bytes;
214 } else {
215 boundary = 4096;
216 maxsegsize = 4096;
219 /* allocate DMAable memory tags */
220 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
221 alignment, /* alignment */
222 boundary, /* boundary */
223 BUS_SPACE_MAXADDR, /* low */
224 BUS_SPACE_MAXADDR, /* high */
225 NULL, NULL, /* filter */
226 bytes, /* maxsize */
227 1, /* num segs */
228 maxsegsize, /* maxsegsize */
229 BUS_DMA_COHERENT, /* flags */
230 NULL, NULL, /* lock */
231 &dma->dmat); /* tag */
232 if (err != 0) {
233 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
234 return err;
237 /* allocate DMAable memory & map */
238 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
239 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
240 | BUS_DMA_ZERO), &dma->map);
241 if (err != 0) {
242 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
243 goto abort_with_dmat;
246 /* load the memory */
247 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
248 mxge_dmamap_callback,
249 (void *)&dma->bus_addr, 0);
250 if (err != 0) {
251 device_printf(dev, "couldn't load map (err = %d)\n", err);
252 goto abort_with_mem;
254 return 0;
256 abort_with_mem:
257 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
258 abort_with_dmat:
259 (void)bus_dma_tag_destroy(dma->dmat);
260 return err;
264 static void
265 mxge_dma_free(mxge_dma_t *dma)
267 bus_dmamap_unload(dma->dmat, dma->map);
268 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
269 (void)bus_dma_tag_destroy(dma->dmat);
273 * The eeprom strings on the lanaiX have the format
274 * SN=x\0
275 * MAC=x:x:x:x:x:x\0
276 * PC=text\0
279 static int
280 mxge_parse_strings(mxge_softc_t *sc)
282 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
284 char *ptr, *limit;
285 int i, found_mac;
287 ptr = sc->eeprom_strings;
288 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
289 found_mac = 0;
290 while (ptr < limit && *ptr != '\0') {
291 if (memcmp(ptr, "MAC=", 4) == 0) {
292 ptr += 1;
293 sc->mac_addr_string = ptr;
294 for (i = 0; i < 6; i++) {
295 ptr += 3;
296 if ((ptr + 2) > limit)
297 goto abort;
298 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
299 found_mac = 1;
301 } else if (memcmp(ptr, "PC=", 3) == 0) {
302 ptr += 3;
303 strncpy(sc->product_code_string, ptr,
304 sizeof (sc->product_code_string) - 1);
305 } else if (memcmp(ptr, "SN=", 3) == 0) {
306 ptr += 3;
307 strncpy(sc->serial_number_string, ptr,
308 sizeof (sc->serial_number_string) - 1);
310 MXGE_NEXT_STRING(ptr);
313 if (found_mac)
314 return 0;
316 abort:
317 device_printf(sc->dev, "failed to parse eeprom_strings\n");
319 return ENXIO;
322 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
323 static void
324 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
326 uint32_t val;
327 unsigned long base, off;
328 char *va, *cfgptr;
329 device_t pdev, mcp55;
330 uint16_t vendor_id, device_id, word;
331 uintptr_t bus, slot, func, ivend, idev;
332 uint32_t *ptr32;
335 if (!mxge_nvidia_ecrc_enable)
336 return;
338 pdev = device_get_parent(device_get_parent(sc->dev));
339 if (pdev == NULL) {
340 device_printf(sc->dev, "could not find parent?\n");
341 return;
343 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
344 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
346 if (vendor_id != 0x10de)
347 return;
349 base = 0;
351 if (device_id == 0x005d) {
352 /* ck804, base address is magic */
353 base = 0xe0000000UL;
354 } else if (device_id >= 0x0374 && device_id <= 0x378) {
355 /* mcp55, base address stored in chipset */
356 mcp55 = pci_find_bsf(0, 0, 0);
357 if (mcp55 &&
358 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
359 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
360 word = pci_read_config(mcp55, 0x90, 2);
361 base = ((unsigned long)word & 0x7ffeU) << 25;
364 if (!base)
365 return;
367 /* XXXX
368 Test below is commented because it is believed that doing
369 config read/write beyond 0xff will access the config space
370 for the next larger function. Uncomment this and remove
371 the hacky pmap_mapdev() way of accessing config space when
372 FreeBSD grows support for extended pcie config space access
374 #if 0
375 /* See if we can, by some miracle, access the extended
376 config space */
377 val = pci_read_config(pdev, 0x178, 4);
378 if (val != 0xffffffff) {
379 val |= 0x40;
380 pci_write_config(pdev, 0x178, val, 4);
381 return;
383 #endif
384 /* Rather than using normal pci config space writes, we must
385 * map the Nvidia config space ourselves. This is because on
386 * opteron/nvidia class machine the 0xe000000 mapping is
387 * handled by the nvidia chipset, that means the internal PCI
388 * device (the on-chip northbridge), or the amd-8131 bridge
389 * and things behind them are not visible by this method.
392 BUS_READ_IVAR(device_get_parent(pdev), pdev,
393 PCI_IVAR_BUS, &bus);
394 BUS_READ_IVAR(device_get_parent(pdev), pdev,
395 PCI_IVAR_SLOT, &slot);
396 BUS_READ_IVAR(device_get_parent(pdev), pdev,
397 PCI_IVAR_FUNCTION, &func);
398 BUS_READ_IVAR(device_get_parent(pdev), pdev,
399 PCI_IVAR_VENDOR, &ivend);
400 BUS_READ_IVAR(device_get_parent(pdev), pdev,
401 PCI_IVAR_DEVICE, &idev);
403 off = base
404 + 0x00100000UL * (unsigned long)bus
405 + 0x00001000UL * (unsigned long)(func
406 + 8 * slot);
408 /* map it into the kernel */
409 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
412 if (va == NULL) {
413 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
414 return;
416 /* get a pointer to the config space mapped into the kernel */
417 cfgptr = va + (off & PAGE_MASK);
419 /* make sure that we can really access it */
420 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
421 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
422 if (! (vendor_id == ivend && device_id == idev)) {
423 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
424 vendor_id, device_id);
425 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
426 return;
429 ptr32 = (uint32_t*)(cfgptr + 0x178);
430 val = *ptr32;
432 if (val == 0xffffffff) {
433 device_printf(sc->dev, "extended mapping failed\n");
434 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
435 return;
437 *ptr32 = val | 0x40;
438 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
439 if (mxge_verbose)
440 device_printf(sc->dev,
441 "Enabled ECRC on upstream Nvidia bridge "
442 "at %d:%d:%d\n",
443 (int)bus, (int)slot, (int)func);
444 return;
446 #else
447 static void
448 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
450 device_printf(sc->dev,
451 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
452 return;
454 #endif
457 static int
458 mxge_dma_test(mxge_softc_t *sc, int test_type)
460 mxge_cmd_t cmd;
461 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
462 int status;
463 uint32_t len;
464 char *test = " ";
467 /* Run a small DMA test.
468 * The magic multipliers to the length tell the firmware
469 * to do DMA read, write, or read+write tests. The
470 * results are returned in cmd.data0. The upper 16
471 * bits of the return is the number of transfers completed.
472 * The lower 16 bits is the time in 0.5us ticks that the
473 * transfers took to complete.
476 len = sc->tx_boundary;
478 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
479 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
480 cmd.data2 = len * 0x10000;
481 status = mxge_send_cmd(sc, test_type, &cmd);
482 if (status != 0) {
483 test = "read";
484 goto abort;
486 sc->read_dma = ((cmd.data0>>16) * len * 2) /
487 (cmd.data0 & 0xffff);
488 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
489 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
490 cmd.data2 = len * 0x1;
491 status = mxge_send_cmd(sc, test_type, &cmd);
492 if (status != 0) {
493 test = "write";
494 goto abort;
496 sc->write_dma = ((cmd.data0>>16) * len * 2) /
497 (cmd.data0 & 0xffff);
499 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
500 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
501 cmd.data2 = len * 0x10001;
502 status = mxge_send_cmd(sc, test_type, &cmd);
503 if (status != 0) {
504 test = "read/write";
505 goto abort;
507 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
508 (cmd.data0 & 0xffff);
510 abort:
511 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
512 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
513 test, status);
515 return status;
519 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
520 * when the PCI-E Completion packets are aligned on an 8-byte
521 * boundary. Some PCI-E chip sets always align Completion packets; on
522 * the ones that do not, the alignment can be enforced by enabling
523 * ECRC generation (if supported).
525 * When PCI-E Completion packets are not aligned, it is actually more
526 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
528 * If the driver can neither enable ECRC nor verify that it has
529 * already been enabled, then it must use a firmware image which works
530 * around unaligned completion packets (ethp_z8e.dat), and it should
531 * also ensure that it never gives the device a Read-DMA which is
532 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
533 * enabled, then the driver should use the aligned (eth_z8e.dat)
534 * firmware image, and set tx_boundary to 4KB.
537 static int
538 mxge_firmware_probe(mxge_softc_t *sc)
540 device_t dev = sc->dev;
541 int reg, status;
542 uint16_t pectl;
544 sc->tx_boundary = 4096;
546 * Verify the max read request size was set to 4KB
547 * before trying the test with 4KB.
549 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
550 pectl = pci_read_config(dev, reg + 0x8, 2);
551 if ((pectl & (5 << 12)) != (5 << 12)) {
552 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
553 pectl);
554 sc->tx_boundary = 2048;
559 * load the optimized firmware (which assumes aligned PCIe
560 * completions) in order to see if it works on this host.
562 sc->fw_name = mxge_fw_aligned;
563 status = mxge_load_firmware(sc, 1);
564 if (status != 0) {
565 return status;
569 * Enable ECRC if possible
571 mxge_enable_nvidia_ecrc(sc);
574 * Run a DMA test which watches for unaligned completions and
575 * aborts on the first one seen.
578 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
579 if (status == 0)
580 return 0; /* keep the aligned firmware */
582 if (status != E2BIG)
583 device_printf(dev, "DMA test failed: %d\n", status);
584 if (status == ENOSYS)
585 device_printf(dev, "Falling back to ethp! "
586 "Please install up to date fw\n");
587 return status;
590 static int
591 mxge_select_firmware(mxge_softc_t *sc)
593 int aligned = 0;
596 if (mxge_force_firmware != 0) {
597 if (mxge_force_firmware == 1)
598 aligned = 1;
599 else
600 aligned = 0;
601 if (mxge_verbose)
602 device_printf(sc->dev,
603 "Assuming %s completions (forced)\n",
604 aligned ? "aligned" : "unaligned");
605 goto abort;
608 /* if the PCIe link width is 4 or less, we can use the aligned
609 firmware and skip any checks */
610 if (sc->link_width != 0 && sc->link_width <= 4) {
611 device_printf(sc->dev,
612 "PCIe x%d Link, expect reduced performance\n",
613 sc->link_width);
614 aligned = 1;
615 goto abort;
618 if (0 == mxge_firmware_probe(sc))
619 return 0;
621 abort:
622 if (aligned) {
623 sc->fw_name = mxge_fw_aligned;
624 sc->tx_boundary = 4096;
625 } else {
626 sc->fw_name = mxge_fw_unaligned;
627 sc->tx_boundary = 2048;
629 return (mxge_load_firmware(sc, 0));
632 union qualhack
634 const char *ro_char;
635 char *rw_char;
638 static int
639 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
643 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
644 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
645 be32toh(hdr->mcp_type));
646 return EIO;
649 /* save firmware version for sysctl */
650 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
651 if (mxge_verbose)
652 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
654 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
655 &sc->fw_ver_minor, &sc->fw_ver_tiny);
657 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
658 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
659 device_printf(sc->dev, "Found firmware version %s\n",
660 sc->fw_version);
661 device_printf(sc->dev, "Driver needs %d.%d\n",
662 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
663 return EINVAL;
665 return 0;
669 static void *
670 z_alloc(void *nil, u_int items, u_int size)
672 void *ptr;
674 ptr = kmalloc(items * size, M_TEMP, M_NOWAIT);
675 return ptr;
678 static void
679 z_free(void *nil, void *ptr)
681 kfree(ptr, M_TEMP);
685 static int
686 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
688 z_stream zs;
689 char *inflate_buffer;
690 const struct firmware *fw;
691 const mcp_gen_header_t *hdr;
692 unsigned hdr_offset;
693 int status;
694 unsigned int i;
695 char dummy;
696 size_t fw_len;
698 fw = firmware_get(sc->fw_name);
699 if (fw == NULL) {
700 device_printf(sc->dev, "Could not find firmware image %s\n",
701 sc->fw_name);
702 return ENOENT;
707 /* setup zlib and decompress f/w */
708 bzero(&zs, sizeof (zs));
709 zs.zalloc = z_alloc;
710 zs.zfree = z_free;
711 status = inflateInit(&zs);
712 if (status != Z_OK) {
713 status = EIO;
714 goto abort_with_fw;
717 /* the uncompressed size is stored as the firmware version,
718 which would otherwise go unused */
719 fw_len = (size_t) fw->version;
720 inflate_buffer = kmalloc(fw_len, M_TEMP, M_NOWAIT);
721 if (inflate_buffer == NULL)
722 goto abort_with_zs;
723 zs.avail_in = fw->datasize;
724 zs.next_in = __DECONST(char *, fw->data);
725 zs.avail_out = fw_len;
726 zs.next_out = inflate_buffer;
727 status = inflate(&zs, Z_FINISH);
728 if (status != Z_STREAM_END) {
729 device_printf(sc->dev, "zlib %d\n", status);
730 status = EIO;
731 goto abort_with_buffer;
734 /* check id */
735 hdr_offset = htobe32(*(const uint32_t *)
736 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
737 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
738 device_printf(sc->dev, "Bad firmware file");
739 status = EIO;
740 goto abort_with_buffer;
742 hdr = (const void*)(inflate_buffer + hdr_offset);
744 status = mxge_validate_firmware(sc, hdr);
745 if (status != 0)
746 goto abort_with_buffer;
748 /* Copy the inflated firmware to NIC SRAM. */
749 for (i = 0; i < fw_len; i += 256) {
750 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
751 inflate_buffer + i,
752 min(256U, (unsigned)(fw_len - i)));
753 wmb();
754 dummy = *sc->sram;
755 wmb();
758 *limit = fw_len;
759 status = 0;
760 abort_with_buffer:
761 kfree(inflate_buffer, M_TEMP);
762 abort_with_zs:
763 inflateEnd(&zs);
764 abort_with_fw:
765 firmware_put(fw, FIRMWARE_UNLOAD);
766 return status;
770 * Enable or disable periodic RDMAs from the host to make certain
771 * chipsets resend dropped PCIe messages
774 static void
775 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
777 char buf_bytes[72];
778 volatile uint32_t *confirm;
779 volatile char *submit;
780 uint32_t *buf, dma_low, dma_high;
781 int i;
783 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
785 /* clear confirmation addr */
786 confirm = (volatile uint32_t *)sc->cmd;
787 *confirm = 0;
788 wmb();
790 /* send an rdma command to the PCIe engine, and wait for the
791 response in the confirmation address. The firmware should
792 write a -1 there to indicate it is alive and well
795 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
796 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
797 buf[0] = htobe32(dma_high); /* confirm addr MSW */
798 buf[1] = htobe32(dma_low); /* confirm addr LSW */
799 buf[2] = htobe32(0xffffffff); /* confirm data */
800 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
801 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
802 buf[3] = htobe32(dma_high); /* dummy addr MSW */
803 buf[4] = htobe32(dma_low); /* dummy addr LSW */
804 buf[5] = htobe32(enable); /* enable? */
807 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
809 mxge_pio_copy(submit, buf, 64);
810 wmb();
811 DELAY(1000);
812 wmb();
813 i = 0;
814 while (*confirm != 0xffffffff && i < 20) {
815 DELAY(1000);
816 i++;
818 if (*confirm != 0xffffffff) {
819 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
820 (enable ? "enable" : "disable"), confirm,
821 *confirm);
823 return;
826 static int
827 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
829 mcp_cmd_t *buf;
830 char buf_bytes[sizeof(*buf) + 8];
831 volatile mcp_cmd_response_t *response = sc->cmd;
832 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
833 uint32_t dma_low, dma_high;
834 int err, sleep_total = 0;
836 /* ensure buf is aligned to 8 bytes */
837 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
839 buf->data0 = htobe32(data->data0);
840 buf->data1 = htobe32(data->data1);
841 buf->data2 = htobe32(data->data2);
842 buf->cmd = htobe32(cmd);
843 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
844 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
846 buf->response_addr.low = htobe32(dma_low);
847 buf->response_addr.high = htobe32(dma_high);
848 lockmgr(&sc->cmd_lock, LK_EXCLUSIVE);
849 response->result = 0xffffffff;
850 wmb();
851 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
853 /* wait up to 20ms */
854 err = EAGAIN;
855 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
856 bus_dmamap_sync(sc->cmd_dma.dmat,
857 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
858 wmb();
859 switch (be32toh(response->result)) {
860 case 0:
861 data->data0 = be32toh(response->data);
862 err = 0;
863 break;
864 case 0xffffffff:
865 DELAY(1000);
866 break;
867 case MXGEFW_CMD_UNKNOWN:
868 err = ENOSYS;
869 break;
870 case MXGEFW_CMD_ERROR_UNALIGNED:
871 err = E2BIG;
872 break;
873 case MXGEFW_CMD_ERROR_BUSY:
874 err = EBUSY;
875 break;
876 default:
877 device_printf(sc->dev,
878 "mxge: command %d "
879 "failed, result = %d\n",
880 cmd, be32toh(response->result));
881 err = ENXIO;
882 break;
884 if (err != EAGAIN)
885 break;
887 if (err == EAGAIN)
888 device_printf(sc->dev, "mxge: command %d timed out"
889 "result = %d\n",
890 cmd, be32toh(response->result));
891 lockmgr(&sc->cmd_lock, LK_RELEASE);
892 return err;
895 static int
896 mxge_adopt_running_firmware(mxge_softc_t *sc)
898 struct mcp_gen_header *hdr;
899 const size_t bytes = sizeof (struct mcp_gen_header);
900 size_t hdr_offset;
901 int status;
903 /* find running firmware header */
904 hdr_offset = htobe32(*(volatile uint32_t *)
905 (sc->sram + MCP_HEADER_PTR_OFFSET));
907 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
908 device_printf(sc->dev,
909 "Running firmware has bad header offset (%d)\n",
910 (int)hdr_offset);
911 return EIO;
914 /* copy header of running firmware from SRAM to host memory to
915 * validate firmware */
916 hdr = kmalloc(bytes, M_DEVBUF, M_NOWAIT);
917 if (hdr == NULL) {
918 device_printf(sc->dev, "could not kmalloc firmware hdr\n");
919 return ENOMEM;
921 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
922 rman_get_bushandle(sc->mem_res),
923 hdr_offset, (char *)hdr, bytes);
924 status = mxge_validate_firmware(sc, hdr);
925 kfree(hdr, M_DEVBUF);
928 * check to see if adopted firmware has bug where adopting
929 * it will cause broadcasts to be filtered unless the NIC
930 * is kept in ALLMULTI mode
932 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
933 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
934 sc->adopted_rx_filter_bug = 1;
935 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
936 "working around rx filter bug\n",
937 sc->fw_ver_major, sc->fw_ver_minor,
938 sc->fw_ver_tiny);
941 return status;
945 static int
946 mxge_load_firmware(mxge_softc_t *sc, int adopt)
948 volatile uint32_t *confirm;
949 volatile char *submit;
950 char buf_bytes[72];
951 uint32_t *buf, size, dma_low, dma_high;
952 int status, i;
954 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
956 size = sc->sram_size;
957 status = mxge_load_firmware_helper(sc, &size);
958 if (status) {
959 if (!adopt)
960 return status;
961 /* Try to use the currently running firmware, if
962 it is new enough */
963 status = mxge_adopt_running_firmware(sc);
964 if (status) {
965 device_printf(sc->dev,
966 "failed to adopt running firmware\n");
967 return status;
969 device_printf(sc->dev,
970 "Successfully adopted running firmware\n");
971 if (sc->tx_boundary == 4096) {
972 device_printf(sc->dev,
973 "Using firmware currently running on NIC"
974 ". For optimal\n");
975 device_printf(sc->dev,
976 "performance consider loading optimized "
977 "firmware\n");
979 sc->fw_name = mxge_fw_unaligned;
980 sc->tx_boundary = 2048;
981 return 0;
983 /* clear confirmation addr */
984 confirm = (volatile uint32_t *)sc->cmd;
985 *confirm = 0;
986 wmb();
987 /* send a reload command to the bootstrap MCP, and wait for the
988 response in the confirmation address. The firmware should
989 write a -1 there to indicate it is alive and well
992 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
993 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
995 buf[0] = htobe32(dma_high); /* confirm addr MSW */
996 buf[1] = htobe32(dma_low); /* confirm addr LSW */
997 buf[2] = htobe32(0xffffffff); /* confirm data */
999 /* FIX: All newest firmware should un-protect the bottom of
1000 the sram before handoff. However, the very first interfaces
1001 do not. Therefore the handoff copy must skip the first 8 bytes
1003 /* where the code starts*/
1004 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1005 buf[4] = htobe32(size - 8); /* length of code */
1006 buf[5] = htobe32(8); /* where to copy to */
1007 buf[6] = htobe32(0); /* where to jump to */
1009 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1010 mxge_pio_copy(submit, buf, 64);
1011 wmb();
1012 DELAY(1000);
1013 wmb();
1014 i = 0;
1015 while (*confirm != 0xffffffff && i < 20) {
1016 DELAY(1000*10);
1017 i++;
1018 bus_dmamap_sync(sc->cmd_dma.dmat,
1019 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1021 if (*confirm != 0xffffffff) {
1022 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1023 confirm, *confirm);
1025 return ENXIO;
1027 return 0;
1030 static int
1031 mxge_update_mac_address(mxge_softc_t *sc)
1033 mxge_cmd_t cmd;
1034 uint8_t *addr = sc->mac_addr;
1035 int status;
1038 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1039 | (addr[2] << 8) | addr[3]);
1041 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1043 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1044 return status;
1047 static int
1048 mxge_change_pause(mxge_softc_t *sc, int pause)
1050 mxge_cmd_t cmd;
1051 int status;
1053 if (pause)
1054 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1055 &cmd);
1056 else
1057 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1058 &cmd);
1060 if (status) {
1061 device_printf(sc->dev, "Failed to set flow control mode\n");
1062 return ENXIO;
1064 sc->pause = pause;
1065 return 0;
1068 static void
1069 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1071 mxge_cmd_t cmd;
1072 int status;
1074 if (mxge_always_promisc)
1075 promisc = 1;
1077 if (promisc)
1078 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1079 &cmd);
1080 else
1081 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1082 &cmd);
1084 if (status) {
1085 device_printf(sc->dev, "Failed to set promisc mode\n");
1089 static void
1090 mxge_set_multicast_list(mxge_softc_t *sc)
1092 mxge_cmd_t cmd;
1093 struct ifmultiaddr *ifma;
1094 struct ifnet *ifp = sc->ifp;
1095 int err;
1097 /* This firmware is known to not support multicast */
1098 if (!sc->fw_multicast_support)
1099 return;
1101 /* Disable multicast filtering while we play with the lists*/
1102 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1103 if (err != 0) {
1104 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1105 " error status: %d\n", err);
1106 return;
1109 if (sc->adopted_rx_filter_bug)
1110 return;
1112 if (ifp->if_flags & IFF_ALLMULTI)
1113 /* request to disable multicast filtering, so quit here */
1114 return;
1116 /* Flush all the filters */
1118 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1119 if (err != 0) {
1120 device_printf(sc->dev,
1121 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1122 ", error status: %d\n", err);
1123 return;
1126 /* Walk the multicast list, and add each address */
1128 if_maddr_rlock(ifp);
1129 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1130 if (ifma->ifma_addr->sa_family != AF_LINK)
1131 continue;
1132 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1133 &cmd.data0, 4);
1134 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1135 &cmd.data1, 2);
1136 cmd.data0 = htonl(cmd.data0);
1137 cmd.data1 = htonl(cmd.data1);
1138 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1139 if (err != 0) {
1140 device_printf(sc->dev, "Failed "
1141 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1142 "%d\t", err);
1143 /* abort, leaving multicast filtering off */
1144 if_maddr_runlock(ifp);
1145 return;
1148 if_maddr_runlock(ifp);
1149 /* Enable multicast filtering */
1150 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1151 if (err != 0) {
1152 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1153 ", error status: %d\n", err);
1157 static int
1158 mxge_max_mtu(mxge_softc_t *sc)
1160 mxge_cmd_t cmd;
1161 int status;
1163 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1164 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1166 /* try to set nbufs to see if it we can
1167 use virtually contiguous jumbos */
1168 cmd.data0 = 0;
1169 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1170 &cmd);
1171 if (status == 0)
1172 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1174 /* otherwise, we're limited to MJUMPAGESIZE */
1175 return MJUMPAGESIZE - MXGEFW_PAD;
1178 static int
1179 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1181 struct mxge_slice_state *ss;
1182 mxge_rx_done_t *rx_done;
1183 volatile uint32_t *irq_claim;
1184 mxge_cmd_t cmd;
1185 int slice, status;
1187 /* try to send a reset command to the card to see if it
1188 is alive */
1189 memset(&cmd, 0, sizeof (cmd));
1190 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1191 if (status != 0) {
1192 device_printf(sc->dev, "failed reset\n");
1193 return ENXIO;
1196 mxge_dummy_rdma(sc, 1);
1199 /* set the intrq size */
1200 cmd.data0 = sc->rx_ring_size;
1201 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1204 * Even though we already know how many slices are supported
1205 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1206 * has magic side effects, and must be called after a reset.
1207 * It must be called prior to calling any RSS related cmds,
1208 * including assigning an interrupt queue for anything but
1209 * slice 0. It must also be called *after*
1210 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1211 * the firmware to compute offsets.
1214 if (sc->num_slices > 1) {
1215 /* ask the maximum number of slices it supports */
1216 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1217 &cmd);
1218 if (status != 0) {
1219 device_printf(sc->dev,
1220 "failed to get number of slices\n");
1221 return status;
1224 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1225 * to setting up the interrupt queue DMA
1227 cmd.data0 = sc->num_slices;
1228 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1229 #ifdef IFNET_BUF_RING
1230 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1231 #endif
1232 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1233 &cmd);
1234 if (status != 0) {
1235 device_printf(sc->dev,
1236 "failed to set number of slices\n");
1237 return status;
1242 if (interrupts_setup) {
1243 /* Now exchange information about interrupts */
1244 for (slice = 0; slice < sc->num_slices; slice++) {
1245 rx_done = &sc->ss[slice].rx_done;
1246 memset(rx_done->entry, 0, sc->rx_ring_size);
1247 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1248 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1249 cmd.data2 = slice;
1250 status |= mxge_send_cmd(sc,
1251 MXGEFW_CMD_SET_INTRQ_DMA,
1252 &cmd);
1256 status |= mxge_send_cmd(sc,
1257 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1260 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1262 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1263 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1266 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1267 &cmd);
1268 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1269 if (status != 0) {
1270 device_printf(sc->dev, "failed set interrupt parameters\n");
1271 return status;
1275 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1278 /* run a DMA benchmark */
1279 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1281 for (slice = 0; slice < sc->num_slices; slice++) {
1282 ss = &sc->ss[slice];
1284 ss->irq_claim = irq_claim + (2 * slice);
1285 /* reset mcp/driver shared state back to 0 */
1286 ss->rx_done.idx = 0;
1287 ss->rx_done.cnt = 0;
1288 ss->tx.req = 0;
1289 ss->tx.done = 0;
1290 ss->tx.pkt_done = 0;
1291 ss->tx.queue_active = 0;
1292 ss->tx.activate = 0;
1293 ss->tx.deactivate = 0;
1294 ss->tx.wake = 0;
1295 ss->tx.defrag = 0;
1296 ss->tx.stall = 0;
1297 ss->rx_big.cnt = 0;
1298 ss->rx_small.cnt = 0;
1299 ss->lro_bad_csum = 0;
1300 ss->lro_queued = 0;
1301 ss->lro_flushed = 0;
1302 if (ss->fw_stats != NULL) {
1303 ss->fw_stats->valid = 0;
1304 ss->fw_stats->send_done_count = 0;
1307 sc->rdma_tags_available = 15;
1308 status = mxge_update_mac_address(sc);
1309 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1310 mxge_change_pause(sc, sc->pause);
1311 mxge_set_multicast_list(sc);
1312 return status;
1315 static int
1316 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1318 mxge_softc_t *sc;
1319 unsigned int intr_coal_delay;
1320 int err;
1322 sc = arg1;
1323 intr_coal_delay = sc->intr_coal_delay;
1324 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1325 if (err != 0) {
1326 return err;
1328 if (intr_coal_delay == sc->intr_coal_delay)
1329 return 0;
1331 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1332 return EINVAL;
1334 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
1335 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1336 sc->intr_coal_delay = intr_coal_delay;
1338 lockmgr(&sc->driver_lock, LK_RELEASE);
1339 return err;
1342 static int
1343 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1345 mxge_softc_t *sc;
1346 unsigned int enabled;
1347 int err;
1349 sc = arg1;
1350 enabled = sc->pause;
1351 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1352 if (err != 0) {
1353 return err;
1355 if (enabled == sc->pause)
1356 return 0;
1358 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
1359 err = mxge_change_pause(sc, enabled);
1360 lockmgr(&sc->driver_lock, LK_RELEASE);
1361 return err;
1364 static int
1365 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1367 struct ifnet *ifp;
1368 int err = 0;
1370 ifp = sc->ifp;
1371 if (lro_cnt == 0)
1372 ifp->if_capenable &= ~IFCAP_LRO;
1373 else
1374 ifp->if_capenable |= IFCAP_LRO;
1375 sc->lro_cnt = lro_cnt;
1376 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1377 mxge_close(sc);
1378 err = mxge_open(sc);
1380 return err;
1383 static int
1384 mxge_change_lro(SYSCTL_HANDLER_ARGS)
1386 mxge_softc_t *sc;
1387 unsigned int lro_cnt;
1388 int err;
1390 sc = arg1;
1391 lro_cnt = sc->lro_cnt;
1392 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1393 if (err != 0)
1394 return err;
1396 if (lro_cnt == sc->lro_cnt)
1397 return 0;
1399 if (lro_cnt > 128)
1400 return EINVAL;
1402 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
1403 err = mxge_change_lro_locked(sc, lro_cnt);
1404 lockmgr(&sc->driver_lock, LK_RELEASE);
1405 return err;
1408 static int
1409 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1411 int err;
1413 if (arg1 == NULL)
1414 return EFAULT;
1415 arg2 = be32toh(*(int *)arg1);
1416 arg1 = NULL;
1417 err = sysctl_handle_int(oidp, arg1, arg2, req);
1419 return err;
1422 static void
1423 mxge_rem_sysctls(mxge_softc_t *sc)
1425 struct mxge_slice_state *ss;
1426 int slice;
1428 if (sc->slice_sysctl_tree == NULL)
1429 return;
1431 for (slice = 0; slice < sc->num_slices; slice++) {
1432 ss = &sc->ss[slice];
1433 if (ss == NULL || ss->sysctl_tree == NULL)
1434 continue;
1435 sysctl_ctx_free(&ss->sysctl_ctx);
1436 ss->sysctl_tree = NULL;
1438 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1439 sc->slice_sysctl_tree = NULL;
1442 static void
1443 mxge_add_sysctls(mxge_softc_t *sc)
1445 struct sysctl_ctx_list *ctx;
1446 struct sysctl_oid_list *children;
1447 mcp_irq_data_t *fw;
1448 struct mxge_slice_state *ss;
1449 int slice;
1450 char slice_num[8];
1452 ctx = device_get_sysctl_ctx(sc->dev);
1453 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
1454 fw = sc->ss[0].fw_stats;
1456 /* random information */
1457 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1458 "firmware_version",
1459 CTLFLAG_RD, &sc->fw_version,
1460 0, "firmware version");
1461 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1462 "serial_number",
1463 CTLFLAG_RD, &sc->serial_number_string,
1464 0, "serial number");
1465 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1466 "product_code",
1467 CTLFLAG_RD, &sc->product_code_string,
1468 0, "product_code");
1469 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1470 "pcie_link_width",
1471 CTLFLAG_RD, &sc->link_width,
1472 0, "tx_boundary");
1473 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1474 "tx_boundary",
1475 CTLFLAG_RD, &sc->tx_boundary,
1476 0, "tx_boundary");
1477 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1478 "write_combine",
1479 CTLFLAG_RD, &sc->wc,
1480 0, "write combining PIO?");
1481 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1482 "read_dma_MBs",
1483 CTLFLAG_RD, &sc->read_dma,
1484 0, "DMA Read speed in MB/s");
1485 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1486 "write_dma_MBs",
1487 CTLFLAG_RD, &sc->write_dma,
1488 0, "DMA Write speed in MB/s");
1489 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1490 "read_write_dma_MBs",
1491 CTLFLAG_RD, &sc->read_write_dma,
1492 0, "DMA concurrent Read/Write speed in MB/s");
1495 /* performance related tunables */
1496 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1497 "intr_coal_delay",
1498 CTLTYPE_INT|CTLFLAG_RW, sc,
1499 0, mxge_change_intr_coal,
1500 "I", "interrupt coalescing delay in usecs");
1502 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1503 "flow_control_enabled",
1504 CTLTYPE_INT|CTLFLAG_RW, sc,
1505 0, mxge_change_flow_control,
1506 "I", "interrupt coalescing delay in usecs");
1508 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1509 "deassert_wait",
1510 CTLFLAG_RW, &mxge_deassert_wait,
1511 0, "Wait for IRQ line to go low in ihandler");
1513 /* stats block from firmware is in network byte order.
1514 Need to swap it */
1515 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1516 "link_up",
1517 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1518 0, mxge_handle_be32,
1519 "I", "link up");
1520 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1521 "rdma_tags_available",
1522 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1523 0, mxge_handle_be32,
1524 "I", "rdma_tags_available");
1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1526 "dropped_bad_crc32",
1527 CTLTYPE_INT|CTLFLAG_RD,
1528 &fw->dropped_bad_crc32,
1529 0, mxge_handle_be32,
1530 "I", "dropped_bad_crc32");
1531 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1532 "dropped_bad_phy",
1533 CTLTYPE_INT|CTLFLAG_RD,
1534 &fw->dropped_bad_phy,
1535 0, mxge_handle_be32,
1536 "I", "dropped_bad_phy");
1537 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1538 "dropped_link_error_or_filtered",
1539 CTLTYPE_INT|CTLFLAG_RD,
1540 &fw->dropped_link_error_or_filtered,
1541 0, mxge_handle_be32,
1542 "I", "dropped_link_error_or_filtered");
1543 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1544 "dropped_link_overflow",
1545 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1546 0, mxge_handle_be32,
1547 "I", "dropped_link_overflow");
1548 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1549 "dropped_multicast_filtered",
1550 CTLTYPE_INT|CTLFLAG_RD,
1551 &fw->dropped_multicast_filtered,
1552 0, mxge_handle_be32,
1553 "I", "dropped_multicast_filtered");
1554 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1555 "dropped_no_big_buffer",
1556 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1557 0, mxge_handle_be32,
1558 "I", "dropped_no_big_buffer");
1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1560 "dropped_no_small_buffer",
1561 CTLTYPE_INT|CTLFLAG_RD,
1562 &fw->dropped_no_small_buffer,
1563 0, mxge_handle_be32,
1564 "I", "dropped_no_small_buffer");
1565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1566 "dropped_overrun",
1567 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1568 0, mxge_handle_be32,
1569 "I", "dropped_overrun");
1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1571 "dropped_pause",
1572 CTLTYPE_INT|CTLFLAG_RD,
1573 &fw->dropped_pause,
1574 0, mxge_handle_be32,
1575 "I", "dropped_pause");
1576 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1577 "dropped_runt",
1578 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1579 0, mxge_handle_be32,
1580 "I", "dropped_runt");
1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1583 "dropped_unicast_filtered",
1584 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1585 0, mxge_handle_be32,
1586 "I", "dropped_unicast_filtered");
1588 /* verbose printing? */
1589 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1590 "verbose",
1591 CTLFLAG_RW, &mxge_verbose,
1592 0, "verbose printing");
1594 /* lro */
1595 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1596 "lro_cnt",
1597 CTLTYPE_INT|CTLFLAG_RW, sc,
1598 0, mxge_change_lro,
1599 "I", "number of lro merge queues");
1602 /* add counters exported for debugging from all slices */
1603 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1604 sc->slice_sysctl_tree =
1605 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1606 "slice", CTLFLAG_RD, 0, "");
1608 for (slice = 0; slice < sc->num_slices; slice++) {
1609 ss = &sc->ss[slice];
1610 sysctl_ctx_init(&ss->sysctl_ctx);
1611 ctx = &ss->sysctl_ctx;
1612 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1613 sprintf(slice_num, "%d", slice);
1614 ss->sysctl_tree =
1615 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1616 CTLFLAG_RD, 0, "");
1617 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1618 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1619 "rx_small_cnt",
1620 CTLFLAG_RD, &ss->rx_small.cnt,
1621 0, "rx_small_cnt");
1622 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1623 "rx_big_cnt",
1624 CTLFLAG_RD, &ss->rx_big.cnt,
1625 0, "rx_small_cnt");
1626 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1627 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1628 0, "number of lro merge queues flushed");
1630 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1631 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1632 0, "number of frames appended to lro merge"
1633 "queues");
1635 #ifndef IFNET_BUF_RING
1636 /* only transmit from slice 0 for now */
1637 if (slice > 0)
1638 continue;
1639 #endif
1640 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1641 "tx_req",
1642 CTLFLAG_RD, &ss->tx.req,
1643 0, "tx_req");
1645 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1646 "tx_done",
1647 CTLFLAG_RD, &ss->tx.done,
1648 0, "tx_done");
1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1650 "tx_pkt_done",
1651 CTLFLAG_RD, &ss->tx.pkt_done,
1652 0, "tx_done");
1653 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1654 "tx_stall",
1655 CTLFLAG_RD, &ss->tx.stall,
1656 0, "tx_stall");
1657 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1658 "tx_wake",
1659 CTLFLAG_RD, &ss->tx.wake,
1660 0, "tx_wake");
1661 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1662 "tx_defrag",
1663 CTLFLAG_RD, &ss->tx.defrag,
1664 0, "tx_defrag");
1665 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1666 "tx_queue_active",
1667 CTLFLAG_RD, &ss->tx.queue_active,
1668 0, "tx_queue_active");
1669 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1670 "tx_activate",
1671 CTLFLAG_RD, &ss->tx.activate,
1672 0, "tx_activate");
1673 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1674 "tx_deactivate",
1675 CTLFLAG_RD, &ss->tx.deactivate,
1676 0, "tx_deactivate");
1680 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1681 backwards one at a time and handle ring wraps */
1683 static inline void
1684 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1685 mcp_kreq_ether_send_t *src, int cnt)
1687 int idx, starting_slot;
1688 starting_slot = tx->req;
1689 while (cnt > 1) {
1690 cnt--;
1691 idx = (starting_slot + cnt) & tx->mask;
1692 mxge_pio_copy(&tx->lanai[idx],
1693 &src[cnt], sizeof(*src));
1694 wmb();
1699 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1700 * at most 32 bytes at a time, so as to avoid involving the software
1701 * pio handler in the nic. We re-write the first segment's flags
1702 * to mark them valid only after writing the entire chain
1705 static inline void
1706 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1707 int cnt)
1709 int idx, i;
1710 uint32_t *src_ints;
1711 volatile uint32_t *dst_ints;
1712 mcp_kreq_ether_send_t *srcp;
1713 volatile mcp_kreq_ether_send_t *dstp, *dst;
1714 uint8_t last_flags;
1716 idx = tx->req & tx->mask;
1718 last_flags = src->flags;
1719 src->flags = 0;
1720 wmb();
1721 dst = dstp = &tx->lanai[idx];
1722 srcp = src;
1724 if ((idx + cnt) < tx->mask) {
1725 for (i = 0; i < (cnt - 1); i += 2) {
1726 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1727 wmb(); /* force write every 32 bytes */
1728 srcp += 2;
1729 dstp += 2;
1731 } else {
1732 /* submit all but the first request, and ensure
1733 that it is submitted below */
1734 mxge_submit_req_backwards(tx, src, cnt);
1735 i = 0;
1737 if (i < cnt) {
1738 /* submit the first request */
1739 mxge_pio_copy(dstp, srcp, sizeof(*src));
1740 wmb(); /* barrier before setting valid flag */
1743 /* re-write the last 32-bits with the valid flags */
1744 src->flags = last_flags;
1745 src_ints = (uint32_t *)src;
1746 src_ints+=3;
1747 dst_ints = (volatile uint32_t *)dst;
1748 dst_ints+=3;
1749 *dst_ints = *src_ints;
1750 tx->req += cnt;
1751 wmb();
1754 #if IFCAP_TSO4
1756 static void
1757 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1758 int busdma_seg_cnt, int ip_off)
1760 mxge_tx_ring_t *tx;
1761 mcp_kreq_ether_send_t *req;
1762 bus_dma_segment_t *seg;
1763 struct ip *ip;
1764 struct tcphdr *tcp;
1765 uint32_t low, high_swapped;
1766 int len, seglen, cum_len, cum_len_next;
1767 int next_is_first, chop, cnt, rdma_count, small;
1768 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1769 uint8_t flags, flags_next;
1770 static int once;
1772 mss = m->m_pkthdr.tso_segsz;
1774 /* negative cum_len signifies to the
1775 * send loop that we are still in the
1776 * header portion of the TSO packet.
1779 /* ensure we have the ethernet, IP and TCP
1780 header together in the first mbuf, copy
1781 it to a scratch buffer if not */
1782 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1783 m_copydata(m, 0, ip_off + sizeof (*ip),
1784 ss->scratch);
1785 ip = (struct ip *)(ss->scratch + ip_off);
1786 } else {
1787 ip = (struct ip *)(mtod(m, char *) + ip_off);
1789 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1790 + sizeof (*tcp))) {
1791 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1792 + sizeof (*tcp), ss->scratch);
1793 ip = (struct ip *)(mtod(m, char *) + ip_off);
1796 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1797 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1799 /* TSO implies checksum offload on this hardware */
1800 cksum_offset = ip_off + (ip->ip_hl << 2);
1801 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1804 /* for TSO, pseudo_hdr_offset holds mss.
1805 * The firmware figures out where to put
1806 * the checksum by parsing the header. */
1807 pseudo_hdr_offset = htobe16(mss);
1809 tx = &ss->tx;
1810 req = tx->req_list;
1811 seg = tx->seg_list;
1812 cnt = 0;
1813 rdma_count = 0;
1814 /* "rdma_count" is the number of RDMAs belonging to the
1815 * current packet BEFORE the current send request. For
1816 * non-TSO packets, this is equal to "count".
1817 * For TSO packets, rdma_count needs to be reset
1818 * to 0 after a segment cut.
1820 * The rdma_count field of the send request is
1821 * the number of RDMAs of the packet starting at
1822 * that request. For TSO send requests with one ore more cuts
1823 * in the middle, this is the number of RDMAs starting
1824 * after the last cut in the request. All previous
1825 * segments before the last cut implicitly have 1 RDMA.
1827 * Since the number of RDMAs is not known beforehand,
1828 * it must be filled-in retroactively - after each
1829 * segmentation cut or at the end of the entire packet.
1832 while (busdma_seg_cnt) {
1833 /* Break the busdma segment up into pieces*/
1834 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1835 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1836 len = seg->ds_len;
1838 while (len) {
1839 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1840 seglen = len;
1841 cum_len_next = cum_len + seglen;
1842 (req-rdma_count)->rdma_count = rdma_count + 1;
1843 if (__predict_true(cum_len >= 0)) {
1844 /* payload */
1845 chop = (cum_len_next > mss);
1846 cum_len_next = cum_len_next % mss;
1847 next_is_first = (cum_len_next == 0);
1848 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1849 flags_next |= next_is_first *
1850 MXGEFW_FLAGS_FIRST;
1851 rdma_count |= -(chop | next_is_first);
1852 rdma_count += chop & !next_is_first;
1853 } else if (cum_len_next >= 0) {
1854 /* header ends */
1855 rdma_count = -1;
1856 cum_len_next = 0;
1857 seglen = -cum_len;
1858 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1859 flags_next = MXGEFW_FLAGS_TSO_PLD |
1860 MXGEFW_FLAGS_FIRST |
1861 (small * MXGEFW_FLAGS_SMALL);
1864 req->addr_high = high_swapped;
1865 req->addr_low = htobe32(low);
1866 req->pseudo_hdr_offset = pseudo_hdr_offset;
1867 req->pad = 0;
1868 req->rdma_count = 1;
1869 req->length = htobe16(seglen);
1870 req->cksum_offset = cksum_offset;
1871 req->flags = flags | ((cum_len & 1) *
1872 MXGEFW_FLAGS_ALIGN_ODD);
1873 low += seglen;
1874 len -= seglen;
1875 cum_len = cum_len_next;
1876 flags = flags_next;
1877 req++;
1878 cnt++;
1879 rdma_count++;
1880 if (__predict_false(cksum_offset > seglen))
1881 cksum_offset -= seglen;
1882 else
1883 cksum_offset = 0;
1884 if (__predict_false(cnt > tx->max_desc))
1885 goto drop;
1887 busdma_seg_cnt--;
1888 seg++;
1890 (req-rdma_count)->rdma_count = rdma_count;
1892 do {
1893 req--;
1894 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1895 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1897 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1898 mxge_submit_req(tx, tx->req_list, cnt);
1899 #ifdef IFNET_BUF_RING
1900 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1901 /* tell the NIC to start polling this slice */
1902 *tx->send_go = 1;
1903 tx->queue_active = 1;
1904 tx->activate++;
1905 wmb();
1907 #endif
1908 return;
1910 drop:
1911 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1912 m_freem(m);
1913 ss->oerrors++;
1914 if (!once) {
1915 printf("tx->max_desc exceeded via TSO!\n");
1916 printf("mss = %d, %ld, %d!\n", mss,
1917 (long)seg - (long)tx->seg_list, tx->max_desc);
1918 once = 1;
1920 return;
1924 #endif /* IFCAP_TSO4 */
1926 #ifdef MXGE_NEW_VLAN_API
1928 * We reproduce the software vlan tag insertion from
1929 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1930 * vlan tag insertion. We need to advertise this in order to have the
1931 * vlan interface respect our csum offload flags.
1933 static struct mbuf *
1934 mxge_vlan_tag_insert(struct mbuf *m)
1936 struct ether_vlan_header *evl;
1938 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
1939 if (__predict_false(m == NULL))
1940 return NULL;
1941 if (m->m_len < sizeof(*evl)) {
1942 m = m_pullup(m, sizeof(*evl));
1943 if (__predict_false(m == NULL))
1944 return NULL;
1947 * Transform the Ethernet header into an Ethernet header
1948 * with 802.1Q encapsulation.
1950 evl = mtod(m, struct ether_vlan_header *);
1951 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
1952 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1953 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1954 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
1955 m->m_flags &= ~M_VLANTAG;
1956 return m;
1958 #endif /* MXGE_NEW_VLAN_API */
1960 static void
1961 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
1963 mxge_softc_t *sc;
1964 mcp_kreq_ether_send_t *req;
1965 bus_dma_segment_t *seg;
1966 struct mbuf *m_tmp;
1967 struct ifnet *ifp;
1968 mxge_tx_ring_t *tx;
1969 struct ip *ip;
1970 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
1971 uint16_t pseudo_hdr_offset;
1972 uint8_t flags, cksum_offset;
1975 sc = ss->sc;
1976 ifp = sc->ifp;
1977 tx = &ss->tx;
1979 ip_off = sizeof (struct ether_header);
1980 #ifdef MXGE_NEW_VLAN_API
1981 if (m->m_flags & M_VLANTAG) {
1982 m = mxge_vlan_tag_insert(m);
1983 if (__predict_false(m == NULL))
1984 goto drop;
1985 ip_off += ETHER_VLAN_ENCAP_LEN;
1987 #endif
1988 /* (try to) map the frame for DMA */
1989 idx = tx->req & tx->mask;
1990 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
1991 m, tx->seg_list, &cnt,
1992 BUS_DMA_NOWAIT);
1993 if (__predict_false(err == EFBIG)) {
1994 /* Too many segments in the chain. Try
1995 to defrag */
1996 m_tmp = m_defrag(m, M_NOWAIT);
1997 if (m_tmp == NULL) {
1998 goto drop;
2000 ss->tx.defrag++;
2001 m = m_tmp;
2002 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2003 tx->info[idx].map,
2004 m, tx->seg_list, &cnt,
2005 BUS_DMA_NOWAIT);
2007 if (__predict_false(err != 0)) {
2008 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2009 " packet len = %d\n", err, m->m_pkthdr.len);
2010 goto drop;
2012 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2013 BUS_DMASYNC_PREWRITE);
2014 tx->info[idx].m = m;
2016 #if IFCAP_TSO4
2017 /* TSO is different enough, we handle it in another routine */
2018 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2019 mxge_encap_tso(ss, m, cnt, ip_off);
2020 return;
2022 #endif
2024 req = tx->req_list;
2025 cksum_offset = 0;
2026 pseudo_hdr_offset = 0;
2027 flags = MXGEFW_FLAGS_NO_TSO;
2029 /* checksum offloading? */
2030 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
2031 /* ensure ip header is in first mbuf, copy
2032 it to a scratch buffer if not */
2033 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2034 m_copydata(m, 0, ip_off + sizeof (*ip),
2035 ss->scratch);
2036 ip = (struct ip *)(ss->scratch + ip_off);
2037 } else {
2038 ip = (struct ip *)(mtod(m, char *) + ip_off);
2040 cksum_offset = ip_off + (ip->ip_hl << 2);
2041 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2042 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2043 req->cksum_offset = cksum_offset;
2044 flags |= MXGEFW_FLAGS_CKSUM;
2045 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2046 } else {
2047 odd_flag = 0;
2049 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2050 flags |= MXGEFW_FLAGS_SMALL;
2052 /* convert segments into a request list */
2053 cum_len = 0;
2054 seg = tx->seg_list;
2055 req->flags = MXGEFW_FLAGS_FIRST;
2056 for (i = 0; i < cnt; i++) {
2057 req->addr_low =
2058 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2059 req->addr_high =
2060 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2061 req->length = htobe16(seg->ds_len);
2062 req->cksum_offset = cksum_offset;
2063 if (cksum_offset > seg->ds_len)
2064 cksum_offset -= seg->ds_len;
2065 else
2066 cksum_offset = 0;
2067 req->pseudo_hdr_offset = pseudo_hdr_offset;
2068 req->pad = 0; /* complete solid 16-byte block */
2069 req->rdma_count = 1;
2070 req->flags |= flags | ((cum_len & 1) * odd_flag);
2071 cum_len += seg->ds_len;
2072 seg++;
2073 req++;
2074 req->flags = 0;
2076 req--;
2077 /* pad runts to 60 bytes */
2078 if (cum_len < 60) {
2079 req++;
2080 req->addr_low =
2081 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2082 req->addr_high =
2083 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2084 req->length = htobe16(60 - cum_len);
2085 req->cksum_offset = 0;
2086 req->pseudo_hdr_offset = pseudo_hdr_offset;
2087 req->pad = 0; /* complete solid 16-byte block */
2088 req->rdma_count = 1;
2089 req->flags |= flags | ((cum_len & 1) * odd_flag);
2090 cnt++;
2093 tx->req_list[0].rdma_count = cnt;
2094 #if 0
2095 /* print what the firmware will see */
2096 for (i = 0; i < cnt; i++) {
2097 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2098 "cso:%d, flags:0x%x, rdma:%d\n",
2099 i, (int)ntohl(tx->req_list[i].addr_high),
2100 (int)ntohl(tx->req_list[i].addr_low),
2101 (int)ntohs(tx->req_list[i].length),
2102 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2103 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2104 tx->req_list[i].rdma_count);
2106 printf("--------------\n");
2107 #endif
2108 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2109 mxge_submit_req(tx, tx->req_list, cnt);
2110 #ifdef IFNET_BUF_RING
2111 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2112 /* tell the NIC to start polling this slice */
2113 *tx->send_go = 1;
2114 tx->queue_active = 1;
2115 tx->activate++;
2116 wmb();
2118 #endif
2119 return;
2121 drop:
2122 m_freem(m);
2123 ss->oerrors++;
2124 return;
2127 #ifdef IFNET_BUF_RING
2128 static void
2129 mxge_qflush(struct ifnet *ifp)
2131 mxge_softc_t *sc = ifp->if_softc;
2132 mxge_tx_ring_t *tx;
2133 struct mbuf *m;
2134 int slice;
2136 for (slice = 0; slice < sc->num_slices; slice++) {
2137 tx = &sc->ss[slice].tx;
2138 lockmgr(&tx->lock, LK_EXCLUSIVE);
2139 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2140 m_freem(m);
2141 lockmgr(&tx->lock, LK_RELEASE);
2143 if_qflush(ifp);
2146 static inline void
2147 mxge_start_locked(struct mxge_slice_state *ss)
2149 mxge_softc_t *sc;
2150 struct mbuf *m;
2151 struct ifnet *ifp;
2152 mxge_tx_ring_t *tx;
2154 sc = ss->sc;
2155 ifp = sc->ifp;
2156 tx = &ss->tx;
2158 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2159 m = drbr_dequeue(ifp, tx->br);
2160 if (m == NULL) {
2161 return;
2163 /* let BPF see it */
2164 BPF_MTAP(ifp, m);
2166 /* give it to the nic */
2167 mxge_encap(ss, m);
2169 /* ran out of transmit slots */
2170 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2171 && (!drbr_empty(ifp, tx->br))) {
2172 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2173 tx->stall++;
2177 static int
2178 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2180 mxge_softc_t *sc;
2181 struct ifnet *ifp;
2182 mxge_tx_ring_t *tx;
2183 int err;
2185 sc = ss->sc;
2186 ifp = sc->ifp;
2187 tx = &ss->tx;
2189 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2190 IFF_DRV_RUNNING) {
2191 err = drbr_enqueue(ifp, tx->br, m);
2192 return (err);
2195 if (drbr_empty(ifp, tx->br) &&
2196 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2197 /* let BPF see it */
2198 BPF_MTAP(ifp, m);
2199 /* give it to the nic */
2200 mxge_encap(ss, m);
2201 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2202 return (err);
2204 if (!drbr_empty(ifp, tx->br))
2205 mxge_start_locked(ss);
2206 return (0);
2209 static int
2210 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2212 mxge_softc_t *sc = ifp->if_softc;
2213 struct mxge_slice_state *ss;
2214 mxge_tx_ring_t *tx;
2215 int err = 0;
2216 int slice;
2218 slice = m->m_pkthdr.flowid;
2219 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2221 ss = &sc->ss[slice];
2222 tx = &ss->tx;
2224 if (lockmgr(&tx->lock, LK_EXCLUSIVE|LK_NOWAIT)) {
2225 err = mxge_transmit_locked(ss, m);
2226 lockmgr(&tx->lock, LK_RELEASE);
2227 } else {
2228 err = drbr_enqueue(ifp, tx->br, m);
2231 return (err);
2234 #else
2236 static inline void
2237 mxge_start_locked(struct mxge_slice_state *ss)
2239 mxge_softc_t *sc;
2240 struct mbuf *m;
2241 struct ifnet *ifp;
2242 mxge_tx_ring_t *tx;
2244 sc = ss->sc;
2245 ifp = sc->ifp;
2246 tx = &ss->tx;
2247 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2248 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2249 if (m == NULL) {
2250 return;
2252 /* let BPF see it */
2253 BPF_MTAP(ifp, m);
2255 /* give it to the nic */
2256 mxge_encap(ss, m);
2258 /* ran out of transmit slots */
2259 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2260 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2261 tx->stall++;
2264 #endif
2265 static void
2266 mxge_start(struct ifnet *ifp)
2268 mxge_softc_t *sc = ifp->if_softc;
2269 struct mxge_slice_state *ss;
2271 /* only use the first slice for now */
2272 ss = &sc->ss[0];
2273 lockmgr(&ss->tx.lock, LK_EXCLUSIVE);
2274 mxge_start_locked(ss);
2275 lockmgr(&ss->tx.lock, LK_RELEASE);
2279 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2280 * at most 32 bytes at a time, so as to avoid involving the software
2281 * pio handler in the nic. We re-write the first segment's low
2282 * DMA address to mark it valid only after we write the entire chunk
2283 * in a burst
2285 static inline void
2286 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2287 mcp_kreq_ether_recv_t *src)
2289 uint32_t low;
2291 low = src->addr_low;
2292 src->addr_low = 0xffffffff;
2293 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2294 wmb();
2295 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2296 wmb();
2297 src->addr_low = low;
2298 dst->addr_low = low;
2299 wmb();
2302 static int
2303 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2305 bus_dma_segment_t seg;
2306 struct mbuf *m;
2307 mxge_rx_ring_t *rx = &ss->rx_small;
2308 int cnt, err;
2310 m = m_gethdr(M_DONTWAIT, MT_DATA);
2311 if (m == NULL) {
2312 rx->alloc_fail++;
2313 err = ENOBUFS;
2314 goto done;
2316 m->m_len = MHLEN;
2317 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2318 &seg, &cnt, BUS_DMA_NOWAIT);
2319 if (err != 0) {
2320 m_free(m);
2321 goto done;
2323 rx->info[idx].m = m;
2324 rx->shadow[idx].addr_low =
2325 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2326 rx->shadow[idx].addr_high =
2327 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2329 done:
2330 if ((idx & 7) == 7)
2331 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2332 return err;
2335 static int
2336 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2338 bus_dma_segment_t seg[3];
2339 struct mbuf *m;
2340 mxge_rx_ring_t *rx = &ss->rx_big;
2341 int cnt, err, i;
2343 if (rx->cl_size == MCLBYTES)
2344 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2345 else
2346 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2347 if (m == NULL) {
2348 rx->alloc_fail++;
2349 err = ENOBUFS;
2350 goto done;
2352 m->m_len = rx->mlen;
2353 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2354 seg, &cnt, BUS_DMA_NOWAIT);
2355 if (err != 0) {
2356 m_free(m);
2357 goto done;
2359 rx->info[idx].m = m;
2360 rx->shadow[idx].addr_low =
2361 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2362 rx->shadow[idx].addr_high =
2363 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2365 #if MXGE_VIRT_JUMBOS
2366 for (i = 1; i < cnt; i++) {
2367 rx->shadow[idx + i].addr_low =
2368 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2369 rx->shadow[idx + i].addr_high =
2370 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2372 #endif
2374 done:
2375 for (i = 0; i < rx->nbufs; i++) {
2376 if ((idx & 7) == 7) {
2377 mxge_submit_8rx(&rx->lanai[idx - 7],
2378 &rx->shadow[idx - 7]);
2380 idx++;
2382 return err;
2386 * Myri10GE hardware checksums are not valid if the sender
2387 * padded the frame with non-zero padding. This is because
2388 * the firmware just does a simple 16-bit 1s complement
2389 * checksum across the entire frame, excluding the first 14
2390 * bytes. It is best to simply to check the checksum and
2391 * tell the stack about it only if the checksum is good
2394 static inline uint16_t
2395 mxge_rx_csum(struct mbuf *m, int csum)
2397 struct ether_header *eh;
2398 struct ip *ip;
2399 uint16_t c;
2401 eh = mtod(m, struct ether_header *);
2403 /* only deal with IPv4 TCP & UDP for now */
2404 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2405 return 1;
2406 ip = (struct ip *)(eh + 1);
2407 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2408 ip->ip_p != IPPROTO_UDP))
2409 return 1;
2410 #ifdef INET
2411 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2412 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2413 - (ip->ip_hl << 2) + ip->ip_p));
2414 #else
2415 c = 1;
2416 #endif
2417 c ^= 0xffff;
2418 return (c);
2421 static void
2422 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2424 struct ether_vlan_header *evl;
2425 struct ether_header *eh;
2426 uint32_t partial;
2428 evl = mtod(m, struct ether_vlan_header *);
2429 eh = mtod(m, struct ether_header *);
2432 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2433 * after what the firmware thought was the end of the ethernet
2434 * header.
2437 /* put checksum into host byte order */
2438 *csum = ntohs(*csum);
2439 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2440 (*csum) += ~partial;
2441 (*csum) += ((*csum) < ~partial);
2442 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2443 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2445 /* restore checksum to network byte order;
2446 later consumers expect this */
2447 *csum = htons(*csum);
2449 /* save the tag */
2450 #ifdef MXGE_NEW_VLAN_API
2451 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2452 #else
2454 struct m_tag *mtag;
2455 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2456 M_NOWAIT);
2457 if (mtag == NULL)
2458 return;
2459 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2460 m_tag_prepend(m, mtag);
2463 #endif
2464 m->m_flags |= M_VLANTAG;
2467 * Remove the 802.1q header by copying the Ethernet
2468 * addresses over it and adjusting the beginning of
2469 * the data in the mbuf. The encapsulated Ethernet
2470 * type field is already in place.
2472 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2473 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2474 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2478 static inline void
2479 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2481 mxge_softc_t *sc;
2482 struct ifnet *ifp;
2483 struct mbuf *m;
2484 struct ether_header *eh;
2485 mxge_rx_ring_t *rx;
2486 bus_dmamap_t old_map;
2487 int idx;
2488 uint16_t tcpudp_csum;
2490 sc = ss->sc;
2491 ifp = sc->ifp;
2492 rx = &ss->rx_big;
2493 idx = rx->cnt & rx->mask;
2494 rx->cnt += rx->nbufs;
2495 /* save a pointer to the received mbuf */
2496 m = rx->info[idx].m;
2497 /* try to replace the received mbuf */
2498 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2499 /* drop the frame -- the old mbuf is re-cycled */
2500 ifp->if_ierrors++;
2501 return;
2504 /* unmap the received buffer */
2505 old_map = rx->info[idx].map;
2506 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2507 bus_dmamap_unload(rx->dmat, old_map);
2509 /* swap the bus_dmamap_t's */
2510 rx->info[idx].map = rx->extra_map;
2511 rx->extra_map = old_map;
2513 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2514 * aligned */
2515 m->m_data += MXGEFW_PAD;
2517 m->m_pkthdr.rcvif = ifp;
2518 m->m_len = m->m_pkthdr.len = len;
2519 ss->ipackets++;
2520 eh = mtod(m, struct ether_header *);
2521 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2522 mxge_vlan_tag_remove(m, &csum);
2524 /* if the checksum is valid, mark it in the mbuf header */
2525 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2526 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2527 return;
2528 /* otherwise, it was a UDP frame, or a TCP frame which
2529 we could not do LRO on. Tell the stack that the
2530 checksum is good */
2531 m->m_pkthdr.csum_data = 0xffff;
2532 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2534 /* flowid only valid if RSS hashing is enabled */
2535 if (sc->num_slices > 1) {
2536 m->m_pkthdr.flowid = (ss - sc->ss);
2537 m->m_flags |= M_FLOWID;
2539 /* pass the frame up the stack */
2540 (*ifp->if_input)(ifp, m);
2543 static inline void
2544 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2546 mxge_softc_t *sc;
2547 struct ifnet *ifp;
2548 struct ether_header *eh;
2549 struct mbuf *m;
2550 mxge_rx_ring_t *rx;
2551 bus_dmamap_t old_map;
2552 int idx;
2553 uint16_t tcpudp_csum;
2555 sc = ss->sc;
2556 ifp = sc->ifp;
2557 rx = &ss->rx_small;
2558 idx = rx->cnt & rx->mask;
2559 rx->cnt++;
2560 /* save a pointer to the received mbuf */
2561 m = rx->info[idx].m;
2562 /* try to replace the received mbuf */
2563 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2564 /* drop the frame -- the old mbuf is re-cycled */
2565 ifp->if_ierrors++;
2566 return;
2569 /* unmap the received buffer */
2570 old_map = rx->info[idx].map;
2571 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2572 bus_dmamap_unload(rx->dmat, old_map);
2574 /* swap the bus_dmamap_t's */
2575 rx->info[idx].map = rx->extra_map;
2576 rx->extra_map = old_map;
2578 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2579 * aligned */
2580 m->m_data += MXGEFW_PAD;
2582 m->m_pkthdr.rcvif = ifp;
2583 m->m_len = m->m_pkthdr.len = len;
2584 ss->ipackets++;
2585 eh = mtod(m, struct ether_header *);
2586 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2587 mxge_vlan_tag_remove(m, &csum);
2589 /* if the checksum is valid, mark it in the mbuf header */
2590 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2591 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2592 return;
2593 /* otherwise, it was a UDP frame, or a TCP frame which
2594 we could not do LRO on. Tell the stack that the
2595 checksum is good */
2596 m->m_pkthdr.csum_data = 0xffff;
2597 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2599 /* flowid only valid if RSS hashing is enabled */
2600 if (sc->num_slices > 1) {
2601 m->m_pkthdr.flowid = (ss - sc->ss);
2602 m->m_flags |= M_FLOWID;
2604 /* pass the frame up the stack */
2605 (*ifp->if_input)(ifp, m);
2608 static inline void
2609 mxge_clean_rx_done(struct mxge_slice_state *ss)
2611 mxge_rx_done_t *rx_done = &ss->rx_done;
2612 int limit = 0;
2613 uint16_t length;
2614 uint16_t checksum;
2617 while (rx_done->entry[rx_done->idx].length != 0) {
2618 length = ntohs(rx_done->entry[rx_done->idx].length);
2619 rx_done->entry[rx_done->idx].length = 0;
2620 checksum = rx_done->entry[rx_done->idx].checksum;
2621 if (length <= (MHLEN - MXGEFW_PAD))
2622 mxge_rx_done_small(ss, length, checksum);
2623 else
2624 mxge_rx_done_big(ss, length, checksum);
2625 rx_done->cnt++;
2626 rx_done->idx = rx_done->cnt & rx_done->mask;
2628 /* limit potential for livelock */
2629 if (__predict_false(++limit > rx_done->mask / 2))
2630 break;
2632 #ifdef INET
2633 while (!SLIST_EMPTY(&ss->lro_active)) {
2634 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
2635 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2636 mxge_lro_flush(ss, lro);
2638 #endif
2642 static inline void
2643 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2645 struct ifnet *ifp;
2646 mxge_tx_ring_t *tx;
2647 struct mbuf *m;
2648 bus_dmamap_t map;
2649 int idx;
2650 int *flags;
2652 tx = &ss->tx;
2653 ifp = ss->sc->ifp;
2654 while (tx->pkt_done != mcp_idx) {
2655 idx = tx->done & tx->mask;
2656 tx->done++;
2657 m = tx->info[idx].m;
2658 /* mbuf and DMA map only attached to the first
2659 segment per-mbuf */
2660 if (m != NULL) {
2661 ss->obytes += m->m_pkthdr.len;
2662 if (m->m_flags & M_MCAST)
2663 ss->omcasts++;
2664 ss->opackets++;
2665 tx->info[idx].m = NULL;
2666 map = tx->info[idx].map;
2667 bus_dmamap_unload(tx->dmat, map);
2668 m_freem(m);
2670 if (tx->info[idx].flag) {
2671 tx->info[idx].flag = 0;
2672 tx->pkt_done++;
2676 /* If we have space, clear IFF_OACTIVE to tell the stack that
2677 its OK to send packets */
2678 #ifdef IFNET_BUF_RING
2679 flags = &ss->if_drv_flags;
2680 #else
2681 flags = &ifp->if_drv_flags;
2682 #endif
2683 lockmgr(&ss->tx.lock, LK_EXCLUSIVE);
2684 if ((*flags) & IFF_DRV_OACTIVE &&
2685 tx->req - tx->done < (tx->mask + 1)/4) {
2686 *(flags) &= ~IFF_DRV_OACTIVE;
2687 ss->tx.wake++;
2688 mxge_start_locked(ss);
2690 #ifdef IFNET_BUF_RING
2691 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2692 /* let the NIC stop polling this queue, since there
2693 * are no more transmits pending */
2694 if (tx->req == tx->done) {
2695 *tx->send_stop = 1;
2696 tx->queue_active = 0;
2697 tx->deactivate++;
2698 wmb();
2701 #endif
2702 lockmgr(&ss->tx.lock, LK_RELEASE);
2706 static struct mxge_media_type mxge_xfp_media_types[] =
2708 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2709 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2710 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2711 {0, (1 << 5), "10GBASE-ER"},
2712 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2713 {0, (1 << 3), "10GBASE-SW"},
2714 {0, (1 << 2), "10GBASE-LW"},
2715 {0, (1 << 1), "10GBASE-EW"},
2716 {0, (1 << 0), "Reserved"}
2718 static struct mxge_media_type mxge_sfp_media_types[] =
2720 {0, (1 << 7), "Reserved"},
2721 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2722 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2723 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}
2726 static void
2727 mxge_set_media(mxge_softc_t *sc, int type)
2729 sc->media_flags |= type;
2730 ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
2731 ifmedia_set(&sc->media, sc->media_flags);
2736 * Determine the media type for a NIC. Some XFPs will identify
2737 * themselves only when their link is up, so this is initiated via a
2738 * link up interrupt. However, this can potentially take up to
2739 * several milliseconds, so it is run via the watchdog routine, rather
2740 * than in the interrupt handler itself. This need only be done
2741 * once, not each time the link is up.
2743 static void
2744 mxge_media_probe(mxge_softc_t *sc)
2746 mxge_cmd_t cmd;
2747 char *cage_type;
2748 char *ptr;
2749 struct mxge_media_type *mxge_media_types = NULL;
2750 int i, err, ms, mxge_media_type_entries;
2751 uint32_t byte;
2753 sc->need_media_probe = 0;
2755 /* if we've already set a media type, we're done */
2756 if (sc->media_flags != (IFM_ETHER | IFM_AUTO))
2757 return;
2760 * parse the product code to deterimine the interface type
2761 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2762 * after the 3rd dash in the driver's cached copy of the
2763 * EEPROM's product code string.
2765 ptr = sc->product_code_string;
2766 if (ptr == NULL) {
2767 device_printf(sc->dev, "Missing product code\n");
2770 for (i = 0; i < 3; i++, ptr++) {
2771 ptr = index(ptr, '-');
2772 if (ptr == NULL) {
2773 device_printf(sc->dev,
2774 "only %d dashes in PC?!?\n", i);
2775 return;
2778 if (*ptr == 'C') {
2779 /* -C is CX4 */
2780 mxge_set_media(sc, IFM_10G_CX4);
2781 return;
2783 else if (*ptr == 'Q') {
2784 /* -Q is Quad Ribbon Fiber */
2785 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2786 /* FreeBSD has no media type for Quad ribbon fiber */
2787 return;
2790 if (*ptr == 'R') {
2791 /* -R is XFP */
2792 mxge_media_types = mxge_xfp_media_types;
2793 mxge_media_type_entries =
2794 sizeof (mxge_xfp_media_types) /
2795 sizeof (mxge_xfp_media_types[0]);
2796 byte = MXGE_XFP_COMPLIANCE_BYTE;
2797 cage_type = "XFP";
2800 if (*ptr == 'S' || *(ptr +1) == 'S') {
2801 /* -S or -2S is SFP+ */
2802 mxge_media_types = mxge_sfp_media_types;
2803 mxge_media_type_entries =
2804 sizeof (mxge_sfp_media_types) /
2805 sizeof (mxge_sfp_media_types[0]);
2806 cage_type = "SFP+";
2807 byte = 3;
2810 if (mxge_media_types == NULL) {
2811 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2812 return;
2816 * At this point we know the NIC has an XFP cage, so now we
2817 * try to determine what is in the cage by using the
2818 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2819 * register. We read just one byte, which may take over
2820 * a millisecond
2823 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2824 cmd.data1 = byte;
2825 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2826 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2827 device_printf(sc->dev, "failed to read XFP\n");
2829 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2830 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2832 if (err != MXGEFW_CMD_OK) {
2833 return;
2836 /* now we wait for the data to be cached */
2837 cmd.data0 = byte;
2838 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2839 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2840 DELAY(1000);
2841 cmd.data0 = byte;
2842 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2844 if (err != MXGEFW_CMD_OK) {
2845 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2846 cage_type, err, ms);
2847 return;
2850 if (cmd.data0 == mxge_media_types[0].bitmask) {
2851 if (mxge_verbose)
2852 device_printf(sc->dev, "%s:%s\n", cage_type,
2853 mxge_media_types[0].name);
2854 mxge_set_media(sc, IFM_10G_CX4);
2855 return;
2857 for (i = 1; i < mxge_media_type_entries; i++) {
2858 if (cmd.data0 & mxge_media_types[i].bitmask) {
2859 if (mxge_verbose)
2860 device_printf(sc->dev, "%s:%s\n",
2861 cage_type,
2862 mxge_media_types[i].name);
2864 mxge_set_media(sc, mxge_media_types[i].flag);
2865 return;
2868 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type,
2869 cmd.data0);
2871 return;
2874 static void
2875 mxge_intr(void *arg)
2877 struct mxge_slice_state *ss = arg;
2878 mxge_softc_t *sc = ss->sc;
2879 mcp_irq_data_t *stats = ss->fw_stats;
2880 mxge_tx_ring_t *tx = &ss->tx;
2881 mxge_rx_done_t *rx_done = &ss->rx_done;
2882 uint32_t send_done_count;
2883 uint8_t valid;
2886 #ifndef IFNET_BUF_RING
2887 /* an interrupt on a non-zero slice is implicitly valid
2888 since MSI-X irqs are not shared */
2889 if (ss != sc->ss) {
2890 mxge_clean_rx_done(ss);
2891 *ss->irq_claim = be32toh(3);
2892 return;
2894 #endif
2896 /* make sure the DMA has finished */
2897 if (!stats->valid) {
2898 return;
2900 valid = stats->valid;
2902 if (sc->legacy_irq) {
2903 /* lower legacy IRQ */
2904 *sc->irq_deassert = 0;
2905 if (!mxge_deassert_wait)
2906 /* don't wait for conf. that irq is low */
2907 stats->valid = 0;
2908 } else {
2909 stats->valid = 0;
2912 /* loop while waiting for legacy irq deassertion */
2913 do {
2914 /* check for transmit completes and receives */
2915 send_done_count = be32toh(stats->send_done_count);
2916 while ((send_done_count != tx->pkt_done) ||
2917 (rx_done->entry[rx_done->idx].length != 0)) {
2918 if (send_done_count != tx->pkt_done)
2919 mxge_tx_done(ss, (int)send_done_count);
2920 mxge_clean_rx_done(ss);
2921 send_done_count = be32toh(stats->send_done_count);
2923 if (sc->legacy_irq && mxge_deassert_wait)
2924 wmb();
2925 } while (*((volatile uint8_t *) &stats->valid));
2927 /* fw link & error stats meaningful only on the first slice */
2928 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
2929 if (sc->link_state != stats->link_up) {
2930 sc->link_state = stats->link_up;
2931 if (sc->link_state) {
2932 sc->ifp->if_link_state = LINK_STATE_UP;
2933 if_link_state_change(sc->ifp);
2934 if (mxge_verbose)
2935 device_printf(sc->dev, "link up\n");
2936 } else {
2937 sc->ifp->if_link_state = LINK_STATE_DOWN;
2938 if_link_state_change(sc->ifp);
2939 if (mxge_verbose)
2940 device_printf(sc->dev, "link down\n");
2942 sc->need_media_probe = 1;
2944 if (sc->rdma_tags_available !=
2945 be32toh(stats->rdma_tags_available)) {
2946 sc->rdma_tags_available =
2947 be32toh(stats->rdma_tags_available);
2948 device_printf(sc->dev, "RDMA timed out! %d tags "
2949 "left\n", sc->rdma_tags_available);
2952 if (stats->link_down) {
2953 sc->down_cnt += stats->link_down;
2954 sc->link_state = 0;
2955 if_link_state_change(sc->ifp, LINK_STATE_DOWN);
2959 /* check to see if we have rx token to pass back */
2960 if (valid & 0x1)
2961 *ss->irq_claim = be32toh(3);
2962 *(ss->irq_claim + 1) = be32toh(3);
2965 static void
2966 mxge_init(void *arg)
2972 static void
2973 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
2975 struct lro_entry *lro_entry;
2976 int i;
2978 while (!SLIST_EMPTY(&ss->lro_free)) {
2979 lro_entry = SLIST_FIRST(&ss->lro_free);
2980 SLIST_REMOVE_HEAD(&ss->lro_free, next);
2981 kfree(lro_entry, M_DEVBUF);
2984 for (i = 0; i <= ss->rx_big.mask; i++) {
2985 if (ss->rx_big.info[i].m == NULL)
2986 continue;
2987 bus_dmamap_unload(ss->rx_big.dmat,
2988 ss->rx_big.info[i].map);
2989 m_freem(ss->rx_big.info[i].m);
2990 ss->rx_big.info[i].m = NULL;
2993 for (i = 0; i <= ss->rx_small.mask; i++) {
2994 if (ss->rx_small.info[i].m == NULL)
2995 continue;
2996 bus_dmamap_unload(ss->rx_small.dmat,
2997 ss->rx_small.info[i].map);
2998 m_freem(ss->rx_small.info[i].m);
2999 ss->rx_small.info[i].m = NULL;
3002 /* transmit ring used only on the first slice */
3003 if (ss->tx.info == NULL)
3004 return;
3006 for (i = 0; i <= ss->tx.mask; i++) {
3007 ss->tx.info[i].flag = 0;
3008 if (ss->tx.info[i].m == NULL)
3009 continue;
3010 bus_dmamap_unload(ss->tx.dmat,
3011 ss->tx.info[i].map);
3012 m_freem(ss->tx.info[i].m);
3013 ss->tx.info[i].m = NULL;
3017 static void
3018 mxge_free_mbufs(mxge_softc_t *sc)
3020 int slice;
3022 for (slice = 0; slice < sc->num_slices; slice++)
3023 mxge_free_slice_mbufs(&sc->ss[slice]);
3026 static void
3027 mxge_free_slice_rings(struct mxge_slice_state *ss)
3029 int i;
3032 if (ss->rx_done.entry != NULL)
3033 mxge_dma_free(&ss->rx_done.dma);
3034 ss->rx_done.entry = NULL;
3036 if (ss->tx.req_bytes != NULL)
3037 kfree(ss->tx.req_bytes, M_DEVBUF);
3038 ss->tx.req_bytes = NULL;
3040 if (ss->tx.seg_list != NULL)
3041 kfree(ss->tx.seg_list, M_DEVBUF);
3042 ss->tx.seg_list = NULL;
3044 if (ss->rx_small.shadow != NULL)
3045 kfree(ss->rx_small.shadow, M_DEVBUF);
3046 ss->rx_small.shadow = NULL;
3048 if (ss->rx_big.shadow != NULL)
3049 kfree(ss->rx_big.shadow, M_DEVBUF);
3050 ss->rx_big.shadow = NULL;
3052 if (ss->tx.info != NULL) {
3053 if (ss->tx.dmat != NULL) {
3054 for (i = 0; i <= ss->tx.mask; i++) {
3055 bus_dmamap_destroy(ss->tx.dmat,
3056 ss->tx.info[i].map);
3058 bus_dma_tag_destroy(ss->tx.dmat);
3060 kfree(ss->tx.info, M_DEVBUF);
3062 ss->tx.info = NULL;
3064 if (ss->rx_small.info != NULL) {
3065 if (ss->rx_small.dmat != NULL) {
3066 for (i = 0; i <= ss->rx_small.mask; i++) {
3067 bus_dmamap_destroy(ss->rx_small.dmat,
3068 ss->rx_small.info[i].map);
3070 bus_dmamap_destroy(ss->rx_small.dmat,
3071 ss->rx_small.extra_map);
3072 bus_dma_tag_destroy(ss->rx_small.dmat);
3074 kfree(ss->rx_small.info, M_DEVBUF);
3076 ss->rx_small.info = NULL;
3078 if (ss->rx_big.info != NULL) {
3079 if (ss->rx_big.dmat != NULL) {
3080 for (i = 0; i <= ss->rx_big.mask; i++) {
3081 bus_dmamap_destroy(ss->rx_big.dmat,
3082 ss->rx_big.info[i].map);
3084 bus_dmamap_destroy(ss->rx_big.dmat,
3085 ss->rx_big.extra_map);
3086 bus_dma_tag_destroy(ss->rx_big.dmat);
3088 kfree(ss->rx_big.info, M_DEVBUF);
3090 ss->rx_big.info = NULL;
3093 static void
3094 mxge_free_rings(mxge_softc_t *sc)
3096 int slice;
3098 for (slice = 0; slice < sc->num_slices; slice++)
3099 mxge_free_slice_rings(&sc->ss[slice]);
3102 static int
3103 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3104 int tx_ring_entries)
3106 mxge_softc_t *sc = ss->sc;
3107 size_t bytes;
3108 int err, i;
3110 err = ENOMEM;
3112 /* allocate per-slice receive resources */
3114 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3115 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3117 /* allocate the rx shadow rings */
3118 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3119 ss->rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3120 if (ss->rx_small.shadow == NULL)
3121 return err;;
3123 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3124 ss->rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3125 if (ss->rx_big.shadow == NULL)
3126 return err;;
3128 /* allocate the rx host info rings */
3129 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3130 ss->rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3131 if (ss->rx_small.info == NULL)
3132 return err;;
3134 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3135 ss->rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3136 if (ss->rx_big.info == NULL)
3137 return err;;
3139 /* allocate the rx busdma resources */
3140 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3141 1, /* alignment */
3142 4096, /* boundary */
3143 BUS_SPACE_MAXADDR, /* low */
3144 BUS_SPACE_MAXADDR, /* high */
3145 NULL, NULL, /* filter */
3146 MHLEN, /* maxsize */
3147 1, /* num segs */
3148 MHLEN, /* maxsegsize */
3149 BUS_DMA_ALLOCNOW, /* flags */
3150 NULL, NULL, /* lock */
3151 &ss->rx_small.dmat); /* tag */
3152 if (err != 0) {
3153 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3154 err);
3155 return err;;
3158 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3159 1, /* alignment */
3160 #if MXGE_VIRT_JUMBOS
3161 4096, /* boundary */
3162 #else
3163 0, /* boundary */
3164 #endif
3165 BUS_SPACE_MAXADDR, /* low */
3166 BUS_SPACE_MAXADDR, /* high */
3167 NULL, NULL, /* filter */
3168 3*4096, /* maxsize */
3169 #if MXGE_VIRT_JUMBOS
3170 3, /* num segs */
3171 4096, /* maxsegsize*/
3172 #else
3173 1, /* num segs */
3174 MJUM9BYTES, /* maxsegsize*/
3175 #endif
3176 BUS_DMA_ALLOCNOW, /* flags */
3177 NULL, NULL, /* lock */
3178 &ss->rx_big.dmat); /* tag */
3179 if (err != 0) {
3180 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3181 err);
3182 return err;;
3184 for (i = 0; i <= ss->rx_small.mask; i++) {
3185 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3186 &ss->rx_small.info[i].map);
3187 if (err != 0) {
3188 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3189 err);
3190 return err;;
3193 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3194 &ss->rx_small.extra_map);
3195 if (err != 0) {
3196 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3197 err);
3198 return err;;
3201 for (i = 0; i <= ss->rx_big.mask; i++) {
3202 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3203 &ss->rx_big.info[i].map);
3204 if (err != 0) {
3205 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3206 err);
3207 return err;;
3210 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3211 &ss->rx_big.extra_map);
3212 if (err != 0) {
3213 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3214 err);
3215 return err;;
3218 /* now allocate TX resouces */
3220 #ifndef IFNET_BUF_RING
3221 /* only use a single TX ring for now */
3222 if (ss != ss->sc->ss)
3223 return 0;
3224 #endif
3226 ss->tx.mask = tx_ring_entries - 1;
3227 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3230 /* allocate the tx request copy block */
3231 bytes = 8 +
3232 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3233 ss->tx.req_bytes = kmalloc(bytes, M_DEVBUF, M_WAITOK);
3234 if (ss->tx.req_bytes == NULL)
3235 return err;;
3236 /* ensure req_list entries are aligned to 8 bytes */
3237 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3238 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3240 /* allocate the tx busdma segment list */
3241 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3242 ss->tx.seg_list = (bus_dma_segment_t *)
3243 kmalloc(bytes, M_DEVBUF, M_WAITOK);
3244 if (ss->tx.seg_list == NULL)
3245 return err;;
3247 /* allocate the tx host info ring */
3248 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3249 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3250 if (ss->tx.info == NULL)
3251 return err;;
3253 /* allocate the tx busdma resources */
3254 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3255 1, /* alignment */
3256 sc->tx_boundary, /* boundary */
3257 BUS_SPACE_MAXADDR, /* low */
3258 BUS_SPACE_MAXADDR, /* high */
3259 NULL, NULL, /* filter */
3260 65536 + 256, /* maxsize */
3261 ss->tx.max_desc - 2, /* num segs */
3262 sc->tx_boundary, /* maxsegsz */
3263 BUS_DMA_ALLOCNOW, /* flags */
3264 NULL, NULL, /* lock */
3265 &ss->tx.dmat); /* tag */
3267 if (err != 0) {
3268 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3269 err);
3270 return err;;
3273 /* now use these tags to setup dmamaps for each slot
3274 in the ring */
3275 for (i = 0; i <= ss->tx.mask; i++) {
3276 err = bus_dmamap_create(ss->tx.dmat, 0,
3277 &ss->tx.info[i].map);
3278 if (err != 0) {
3279 device_printf(sc->dev, "Err %d tx dmamap\n",
3280 err);
3281 return err;;
3284 return 0;
3288 static int
3289 mxge_alloc_rings(mxge_softc_t *sc)
3291 mxge_cmd_t cmd;
3292 int tx_ring_size;
3293 int tx_ring_entries, rx_ring_entries;
3294 int err, slice;
3296 /* get ring sizes */
3297 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3298 tx_ring_size = cmd.data0;
3299 if (err != 0) {
3300 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3301 goto abort;
3304 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3305 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3306 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3307 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3308 IFQ_SET_READY(&sc->ifp->if_snd);
3310 for (slice = 0; slice < sc->num_slices; slice++) {
3311 err = mxge_alloc_slice_rings(&sc->ss[slice],
3312 rx_ring_entries,
3313 tx_ring_entries);
3314 if (err != 0)
3315 goto abort;
3317 return 0;
3319 abort:
3320 mxge_free_rings(sc);
3321 return err;
3326 static void
3327 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3329 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3331 if (bufsize < MCLBYTES) {
3332 /* easy, everything fits in a single buffer */
3333 *big_buf_size = MCLBYTES;
3334 *cl_size = MCLBYTES;
3335 *nbufs = 1;
3336 return;
3339 if (bufsize < MJUMPAGESIZE) {
3340 /* still easy, everything still fits in a single buffer */
3341 *big_buf_size = MJUMPAGESIZE;
3342 *cl_size = MJUMPAGESIZE;
3343 *nbufs = 1;
3344 return;
3346 #if MXGE_VIRT_JUMBOS
3347 /* now we need to use virtually contiguous buffers */
3348 *cl_size = MJUM9BYTES;
3349 *big_buf_size = 4096;
3350 *nbufs = mtu / 4096 + 1;
3351 /* needs to be a power of two, so round up */
3352 if (*nbufs == 3)
3353 *nbufs = 4;
3354 #else
3355 *cl_size = MJUM9BYTES;
3356 *big_buf_size = MJUM9BYTES;
3357 *nbufs = 1;
3358 #endif
3361 static int
3362 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3364 mxge_softc_t *sc;
3365 mxge_cmd_t cmd;
3366 bus_dmamap_t map;
3367 struct lro_entry *lro_entry;
3368 int err, i, slice;
3371 sc = ss->sc;
3372 slice = ss - sc->ss;
3374 SLIST_INIT(&ss->lro_free);
3375 SLIST_INIT(&ss->lro_active);
3377 for (i = 0; i < sc->lro_cnt; i++) {
3378 lro_entry = (struct lro_entry *)
3379 kmalloc(sizeof (*lro_entry), M_DEVBUF,
3380 M_NOWAIT | M_ZERO);
3381 if (lro_entry == NULL) {
3382 sc->lro_cnt = i;
3383 break;
3385 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3387 /* get the lanai pointers to the send and receive rings */
3389 err = 0;
3390 #ifndef IFNET_BUF_RING
3391 /* We currently only send from the first slice */
3392 if (slice == 0) {
3393 #endif
3394 cmd.data0 = slice;
3395 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3396 ss->tx.lanai =
3397 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3398 ss->tx.send_go = (volatile uint32_t *)
3399 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3400 ss->tx.send_stop = (volatile uint32_t *)
3401 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3402 #ifndef IFNET_BUF_RING
3404 #endif
3405 cmd.data0 = slice;
3406 err |= mxge_send_cmd(sc,
3407 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3408 ss->rx_small.lanai =
3409 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3410 cmd.data0 = slice;
3411 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3412 ss->rx_big.lanai =
3413 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3415 if (err != 0) {
3416 device_printf(sc->dev,
3417 "failed to get ring sizes or locations\n");
3418 return EIO;
3421 /* stock receive rings */
3422 for (i = 0; i <= ss->rx_small.mask; i++) {
3423 map = ss->rx_small.info[i].map;
3424 err = mxge_get_buf_small(ss, map, i);
3425 if (err) {
3426 device_printf(sc->dev, "alloced %d/%d smalls\n",
3427 i, ss->rx_small.mask + 1);
3428 return ENOMEM;
3431 for (i = 0; i <= ss->rx_big.mask; i++) {
3432 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3433 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3435 ss->rx_big.nbufs = nbufs;
3436 ss->rx_big.cl_size = cl_size;
3437 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3438 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3439 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3440 map = ss->rx_big.info[i].map;
3441 err = mxge_get_buf_big(ss, map, i);
3442 if (err) {
3443 device_printf(sc->dev, "alloced %d/%d bigs\n",
3444 i, ss->rx_big.mask + 1);
3445 return ENOMEM;
3448 return 0;
3451 static int
3452 mxge_open(mxge_softc_t *sc)
3454 mxge_cmd_t cmd;
3455 int err, big_bytes, nbufs, slice, cl_size, i;
3456 bus_addr_t bus;
3457 volatile uint8_t *itable;
3458 struct mxge_slice_state *ss;
3460 /* Copy the MAC address in case it was overridden */
3461 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3463 err = mxge_reset(sc, 1);
3464 if (err != 0) {
3465 device_printf(sc->dev, "failed to reset\n");
3466 return EIO;
3469 if (sc->num_slices > 1) {
3470 /* setup the indirection table */
3471 cmd.data0 = sc->num_slices;
3472 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3473 &cmd);
3475 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3476 &cmd);
3477 if (err != 0) {
3478 device_printf(sc->dev,
3479 "failed to setup rss tables\n");
3480 return err;
3483 /* just enable an identity mapping */
3484 itable = sc->sram + cmd.data0;
3485 for (i = 0; i < sc->num_slices; i++)
3486 itable[i] = (uint8_t)i;
3488 cmd.data0 = 1;
3489 cmd.data1 = mxge_rss_hash_type;
3490 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3491 if (err != 0) {
3492 device_printf(sc->dev, "failed to enable slices\n");
3493 return err;
3498 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3500 cmd.data0 = nbufs;
3501 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3502 &cmd);
3503 /* error is only meaningful if we're trying to set
3504 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3505 if (err && nbufs > 1) {
3506 device_printf(sc->dev,
3507 "Failed to set alway-use-n to %d\n",
3508 nbufs);
3509 return EIO;
3511 /* Give the firmware the mtu and the big and small buffer
3512 sizes. The firmware wants the big buf size to be a power
3513 of two. Luckily, FreeBSD's clusters are powers of two */
3514 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3515 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3516 cmd.data0 = MHLEN - MXGEFW_PAD;
3517 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3518 &cmd);
3519 cmd.data0 = big_bytes;
3520 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3522 if (err != 0) {
3523 device_printf(sc->dev, "failed to setup params\n");
3524 goto abort;
3527 /* Now give him the pointer to the stats block */
3528 for (slice = 0;
3529 #ifdef IFNET_BUF_RING
3530 slice < sc->num_slices;
3531 #else
3532 slice < 1;
3533 #endif
3534 slice++) {
3535 ss = &sc->ss[slice];
3536 cmd.data0 =
3537 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3538 cmd.data1 =
3539 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3540 cmd.data2 = sizeof(struct mcp_irq_data);
3541 cmd.data2 |= (slice << 16);
3542 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3545 if (err != 0) {
3546 bus = sc->ss->fw_stats_dma.bus_addr;
3547 bus += offsetof(struct mcp_irq_data, send_done_count);
3548 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3549 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3550 err = mxge_send_cmd(sc,
3551 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3552 &cmd);
3553 /* Firmware cannot support multicast without STATS_DMA_V2 */
3554 sc->fw_multicast_support = 0;
3555 } else {
3556 sc->fw_multicast_support = 1;
3559 if (err != 0) {
3560 device_printf(sc->dev, "failed to setup params\n");
3561 goto abort;
3564 for (slice = 0; slice < sc->num_slices; slice++) {
3565 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3566 if (err != 0) {
3567 device_printf(sc->dev, "couldn't open slice %d\n",
3568 slice);
3569 goto abort;
3573 /* Finally, start the firmware running */
3574 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3575 if (err) {
3576 device_printf(sc->dev, "Couldn't bring up link\n");
3577 goto abort;
3579 #ifdef IFNET_BUF_RING
3580 for (slice = 0; slice < sc->num_slices; slice++) {
3581 ss = &sc->ss[slice];
3582 ss->if_drv_flags |= IFF_DRV_RUNNING;
3583 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3585 #endif
3586 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3587 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3588 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3590 return 0;
3593 abort:
3594 mxge_free_mbufs(sc);
3596 return err;
3599 static int
3600 mxge_close(mxge_softc_t *sc)
3602 mxge_cmd_t cmd;
3603 int err, old_down_cnt;
3604 #ifdef IFNET_BUF_RING
3605 struct mxge_slice_state *ss;
3606 int slice;
3607 #endif
3609 callout_stop(&sc->co_hdl);
3610 #ifdef IFNET_BUF_RING
3611 for (slice = 0; slice < sc->num_slices; slice++) {
3612 ss = &sc->ss[slice];
3613 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3615 #endif
3616 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3617 old_down_cnt = sc->down_cnt;
3618 wmb();
3619 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3620 if (err) {
3621 device_printf(sc->dev, "Couldn't bring down link\n");
3623 if (old_down_cnt == sc->down_cnt) {
3624 /* wait for down irq */
3625 DELAY(10 * sc->intr_coal_delay);
3627 wmb();
3628 if (old_down_cnt == sc->down_cnt) {
3629 device_printf(sc->dev, "never got down irq\n");
3632 mxge_free_mbufs(sc);
3634 return 0;
3637 static void
3638 mxge_setup_cfg_space(mxge_softc_t *sc)
3640 device_t dev = sc->dev;
3641 int reg;
3642 uint16_t cmd, lnk, pectl;
3644 /* find the PCIe link width and set max read request to 4KB*/
3645 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3646 lnk = pci_read_config(dev, reg + 0x12, 2);
3647 sc->link_width = (lnk >> 4) & 0x3f;
3649 pectl = pci_read_config(dev, reg + 0x8, 2);
3650 pectl = (pectl & ~0x7000) | (5 << 12);
3651 pci_write_config(dev, reg + 0x8, pectl, 2);
3654 /* Enable DMA and Memory space access */
3655 pci_enable_busmaster(dev);
3656 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3657 cmd |= PCIM_CMD_MEMEN;
3658 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3661 static uint32_t
3662 mxge_read_reboot(mxge_softc_t *sc)
3664 device_t dev = sc->dev;
3665 uint32_t vs;
3667 /* find the vendor specific offset */
3668 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3669 device_printf(sc->dev,
3670 "could not find vendor specific offset\n");
3671 return (uint32_t)-1;
3673 /* enable read32 mode */
3674 pci_write_config(dev, vs + 0x10, 0x3, 1);
3675 /* tell NIC which register to read */
3676 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3677 return (pci_read_config(dev, vs + 0x14, 4));
3680 static int
3681 mxge_watchdog_reset(mxge_softc_t *sc, int slice)
3683 struct pci_devinfo *dinfo;
3684 mxge_tx_ring_t *tx;
3685 int err;
3686 uint32_t reboot;
3687 uint16_t cmd;
3689 err = ENXIO;
3691 device_printf(sc->dev, "Watchdog reset!\n");
3694 * check to see if the NIC rebooted. If it did, then all of
3695 * PCI config space has been reset, and things like the
3696 * busmaster bit will be zero. If this is the case, then we
3697 * must restore PCI config space before the NIC can be used
3698 * again
3700 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3701 if (cmd == 0xffff) {
3703 * maybe the watchdog caught the NIC rebooting; wait
3704 * up to 100ms for it to finish. If it does not come
3705 * back, then give up
3707 DELAY(1000*100);
3708 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3709 if (cmd == 0xffff) {
3710 device_printf(sc->dev, "NIC disappeared!\n");
3711 return (err);
3714 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3715 /* print the reboot status */
3716 reboot = mxge_read_reboot(sc);
3717 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3718 reboot);
3719 /* restore PCI configuration space */
3720 dinfo = device_get_ivars(sc->dev);
3721 pci_cfg_restore(sc->dev, dinfo);
3723 /* and redo any changes we made to our config space */
3724 mxge_setup_cfg_space(sc);
3726 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) {
3727 mxge_close(sc);
3728 err = mxge_open(sc);
3730 } else {
3731 tx = &sc->ss[slice].tx;
3732 device_printf(sc->dev,
3733 "NIC did not reboot, slice %d ring state:\n",
3734 slice);
3735 device_printf(sc->dev,
3736 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3737 tx->req, tx->done, tx->queue_active);
3738 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3739 tx->activate, tx->deactivate);
3740 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3741 tx->pkt_done,
3742 be32toh(sc->ss->fw_stats->send_done_count));
3743 device_printf(sc->dev, "not resetting\n");
3745 return (err);
3748 static int
3749 mxge_watchdog(mxge_softc_t *sc)
3751 mxge_tx_ring_t *tx;
3752 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3753 int i, err = 0;
3755 /* see if we have outstanding transmits, which
3756 have been pending for more than mxge_ticks */
3757 for (i = 0;
3758 #ifdef IFNET_BUF_RING
3759 (i < sc->num_slices) && (err == 0);
3760 #else
3761 (i < 1) && (err == 0);
3762 #endif
3763 i++) {
3764 tx = &sc->ss[i].tx;
3765 if (tx->req != tx->done &&
3766 tx->watchdog_req != tx->watchdog_done &&
3767 tx->done == tx->watchdog_done) {
3768 /* check for pause blocking before resetting */
3769 if (tx->watchdog_rx_pause == rx_pause)
3770 err = mxge_watchdog_reset(sc, i);
3771 else
3772 device_printf(sc->dev, "Flow control blocking "
3773 "xmits, check link partner\n");
3776 tx->watchdog_req = tx->req;
3777 tx->watchdog_done = tx->done;
3778 tx->watchdog_rx_pause = rx_pause;
3781 if (sc->need_media_probe)
3782 mxge_media_probe(sc);
3783 return (err);
3786 static void
3787 mxge_update_stats(mxge_softc_t *sc)
3789 struct mxge_slice_state *ss;
3790 u_long ipackets = 0;
3791 u_long opackets = 0;
3792 #ifdef IFNET_BUF_RING
3793 u_long obytes = 0;
3794 u_long omcasts = 0;
3795 u_long odrops = 0;
3796 #endif
3797 u_long oerrors = 0;
3798 int slice;
3800 for (slice = 0; slice < sc->num_slices; slice++) {
3801 ss = &sc->ss[slice];
3802 ipackets += ss->ipackets;
3803 opackets += ss->opackets;
3804 #ifdef IFNET_BUF_RING
3805 obytes += ss->obytes;
3806 omcasts += ss->omcasts;
3807 odrops += ss->tx.br->br_drops;
3808 #endif
3809 oerrors += ss->oerrors;
3811 sc->ifp->if_ipackets = ipackets;
3812 sc->ifp->if_opackets = opackets;
3813 #ifdef IFNET_BUF_RING
3814 sc->ifp->if_obytes = obytes;
3815 sc->ifp->if_omcasts = omcasts;
3816 sc->ifp->if_snd.ifq_drops = odrops;
3817 #endif
3818 sc->ifp->if_oerrors = oerrors;
3821 static void
3822 mxge_tick(void *arg)
3824 mxge_softc_t *sc = arg;
3825 int err = 0;
3827 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
3828 /* aggregate stats from different slices */
3829 mxge_update_stats(sc);
3830 if (!sc->watchdog_countdown) {
3831 err = mxge_watchdog(sc);
3832 sc->watchdog_countdown = 4;
3834 sc->watchdog_countdown--;
3835 if (err == 0)
3836 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3837 lockmgr(&sc->driver_lock, LK_RELEASE);
3840 static int
3841 mxge_media_change(struct ifnet *ifp)
3843 return EINVAL;
3846 static int
3847 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3849 struct ifnet *ifp = sc->ifp;
3850 int real_mtu, old_mtu;
3851 int err = 0;
3854 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3855 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3856 return EINVAL;
3857 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
3858 old_mtu = ifp->if_mtu;
3859 ifp->if_mtu = mtu;
3860 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3861 mxge_close(sc);
3862 err = mxge_open(sc);
3863 if (err != 0) {
3864 ifp->if_mtu = old_mtu;
3865 mxge_close(sc);
3866 (void) mxge_open(sc);
3869 lockmgr(&sc->driver_lock, LK_RELEASE);
3870 return err;
3873 static void
3874 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3876 mxge_softc_t *sc = ifp->if_softc;
3879 if (sc == NULL)
3880 return;
3881 ifmr->ifm_status = IFM_AVALID;
3882 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3883 ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
3884 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
3887 static int
3888 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
3890 mxge_softc_t *sc = ifp->if_softc;
3891 struct ifreq *ifr = (struct ifreq *)data;
3892 int err, mask;
3894 err = 0;
3895 switch (command) {
3896 case SIOCSIFADDR:
3897 case SIOCGIFADDR:
3898 err = ether_ioctl(ifp, command, data);
3899 break;
3901 case SIOCSIFMTU:
3902 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3903 break;
3905 case SIOCSIFFLAGS:
3906 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
3907 if (sc->dying) {
3908 lockmgr(&sc->driver_lock, LK_RELEASE);
3909 return EINVAL;
3911 if (ifp->if_flags & IFF_UP) {
3912 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3913 err = mxge_open(sc);
3914 } else {
3915 /* take care of promis can allmulti
3916 flag chages */
3917 mxge_change_promisc(sc,
3918 ifp->if_flags & IFF_PROMISC);
3919 mxge_set_multicast_list(sc);
3921 } else {
3922 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3923 mxge_close(sc);
3926 lockmgr(&sc->driver_lock, LK_RELEASE);
3927 break;
3929 case SIOCADDMULTI:
3930 case SIOCDELMULTI:
3931 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
3932 mxge_set_multicast_list(sc);
3933 lockmgr(&sc->driver_lock, LK_RELEASE);
3934 break;
3936 case SIOCSIFCAP:
3937 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
3938 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3939 if (mask & IFCAP_TXCSUM) {
3940 if (IFCAP_TXCSUM & ifp->if_capenable) {
3941 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
3942 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
3943 | CSUM_TSO);
3944 } else {
3945 ifp->if_capenable |= IFCAP_TXCSUM;
3946 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
3948 } else if (mask & IFCAP_RXCSUM) {
3949 if (IFCAP_RXCSUM & ifp->if_capenable) {
3950 ifp->if_capenable &= ~IFCAP_RXCSUM;
3951 sc->csum_flag = 0;
3952 } else {
3953 ifp->if_capenable |= IFCAP_RXCSUM;
3954 sc->csum_flag = 1;
3957 if (mask & IFCAP_TSO4) {
3958 if (IFCAP_TSO4 & ifp->if_capenable) {
3959 ifp->if_capenable &= ~IFCAP_TSO4;
3960 ifp->if_hwassist &= ~CSUM_TSO;
3961 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
3962 ifp->if_capenable |= IFCAP_TSO4;
3963 ifp->if_hwassist |= CSUM_TSO;
3964 } else {
3965 printf("mxge requires tx checksum offload"
3966 " be enabled to use TSO\n");
3967 err = EINVAL;
3970 if (mask & IFCAP_LRO) {
3971 if (IFCAP_LRO & ifp->if_capenable)
3972 err = mxge_change_lro_locked(sc, 0);
3973 else
3974 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
3976 if (mask & IFCAP_VLAN_HWTAGGING)
3977 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3978 lockmgr(&sc->driver_lock, LK_RELEASE);
3979 VLAN_CAPABILITIES(ifp);
3981 break;
3983 case SIOCGIFMEDIA:
3984 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
3985 &sc->media, command);
3986 break;
3988 default:
3989 err = ENOTTY;
3991 return err;
3994 static void
3995 mxge_fetch_tunables(mxge_softc_t *sc)
3998 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
3999 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4000 &mxge_flow_control);
4001 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4002 &mxge_intr_coal_delay);
4003 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4004 &mxge_nvidia_ecrc_enable);
4005 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4006 &mxge_force_firmware);
4007 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4008 &mxge_deassert_wait);
4009 TUNABLE_INT_FETCH("hw.mxge.verbose",
4010 &mxge_verbose);
4011 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4012 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
4013 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4014 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4015 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4016 if (sc->lro_cnt != 0)
4017 mxge_lro_cnt = sc->lro_cnt;
4019 if (bootverbose)
4020 mxge_verbose = 1;
4021 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4022 mxge_intr_coal_delay = 30;
4023 if (mxge_ticks == 0)
4024 mxge_ticks = hz / 2;
4025 sc->pause = mxge_flow_control;
4026 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4027 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4028 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
4030 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4031 mxge_initial_mtu < ETHER_MIN_LEN)
4032 mxge_initial_mtu = ETHERMTU_JUMBO;
4036 static void
4037 mxge_free_slices(mxge_softc_t *sc)
4039 struct mxge_slice_state *ss;
4040 int i;
4043 if (sc->ss == NULL)
4044 return;
4046 for (i = 0; i < sc->num_slices; i++) {
4047 ss = &sc->ss[i];
4048 if (ss->fw_stats != NULL) {
4049 mxge_dma_free(&ss->fw_stats_dma);
4050 ss->fw_stats = NULL;
4051 #ifdef IFNET_BUF_RING
4052 if (ss->tx.br != NULL) {
4053 drbr_free(ss->tx.br, M_DEVBUF);
4054 ss->tx.br = NULL;
4056 #endif
4057 lockuninit(&ss->tx.lock);
4059 if (ss->rx_done.entry != NULL) {
4060 mxge_dma_free(&ss->rx_done.dma);
4061 ss->rx_done.entry = NULL;
4064 free(sc->ss, M_DEVBUF);
4065 sc->ss = NULL;
4068 static int
4069 mxge_alloc_slices(mxge_softc_t *sc)
4071 mxge_cmd_t cmd;
4072 struct mxge_slice_state *ss;
4073 size_t bytes;
4074 int err, i, max_intr_slots;
4076 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4077 if (err != 0) {
4078 device_printf(sc->dev, "Cannot determine rx ring size\n");
4079 return err;
4081 sc->rx_ring_size = cmd.data0;
4082 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4084 bytes = sizeof (*sc->ss) * sc->num_slices;
4085 sc->ss = kmalloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4086 if (sc->ss == NULL)
4087 return (ENOMEM);
4088 for (i = 0; i < sc->num_slices; i++) {
4089 ss = &sc->ss[i];
4091 ss->sc = sc;
4093 /* allocate per-slice rx interrupt queues */
4095 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4096 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4097 if (err != 0)
4098 goto abort;
4099 ss->rx_done.entry = ss->rx_done.dma.addr;
4100 bzero(ss->rx_done.entry, bytes);
4103 * allocate the per-slice firmware stats; stats
4104 * (including tx) are used used only on the first
4105 * slice for now
4107 #ifndef IFNET_BUF_RING
4108 if (i > 0)
4109 continue;
4110 #endif
4112 bytes = sizeof (*ss->fw_stats);
4113 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4114 sizeof (*ss->fw_stats), 64);
4115 if (err != 0)
4116 goto abort;
4117 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4118 snprintf(ss->tx.lock_name, sizeof(ss->tx.lock_name),
4119 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4120 lock_init(&ss->tx.lock, ss->tx.lock_name, 0, LK_CANRECURSE);
4121 #ifdef IFNET_BUF_RING
4122 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4123 &ss->tx.lock);
4124 #endif
4127 return (0);
4129 abort:
4130 mxge_free_slices(sc);
4131 return (ENOMEM);
4134 static void
4135 mxge_slice_probe(mxge_softc_t *sc)
4137 mxge_cmd_t cmd;
4138 char *old_fw;
4139 int msix_cnt, status, max_intr_slots;
4141 sc->num_slices = 1;
4143 * don't enable multiple slices if they are not enabled,
4144 * or if this is not an SMP system
4147 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2)
4148 return;
4150 /* see how many MSI-X interrupts are available */
4151 msix_cnt = pci_msix_count(sc->dev);
4152 if (msix_cnt < 2)
4153 return;
4155 /* now load the slice aware firmware see what it supports */
4156 old_fw = sc->fw_name;
4157 if (old_fw == mxge_fw_aligned)
4158 sc->fw_name = mxge_fw_rss_aligned;
4159 else
4160 sc->fw_name = mxge_fw_rss_unaligned;
4161 status = mxge_load_firmware(sc, 0);
4162 if (status != 0) {
4163 device_printf(sc->dev, "Falling back to a single slice\n");
4164 return;
4167 /* try to send a reset command to the card to see if it
4168 is alive */
4169 memset(&cmd, 0, sizeof (cmd));
4170 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4171 if (status != 0) {
4172 device_printf(sc->dev, "failed reset\n");
4173 goto abort_with_fw;
4176 /* get rx ring size */
4177 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4178 if (status != 0) {
4179 device_printf(sc->dev, "Cannot determine rx ring size\n");
4180 goto abort_with_fw;
4182 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4184 /* tell it the size of the interrupt queues */
4185 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4186 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4187 if (status != 0) {
4188 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4189 goto abort_with_fw;
4192 /* ask the maximum number of slices it supports */
4193 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4194 if (status != 0) {
4195 device_printf(sc->dev,
4196 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4197 goto abort_with_fw;
4199 sc->num_slices = cmd.data0;
4200 if (sc->num_slices > msix_cnt)
4201 sc->num_slices = msix_cnt;
4203 if (mxge_max_slices == -1) {
4204 /* cap to number of CPUs in system */
4205 if (sc->num_slices > mp_ncpus)
4206 sc->num_slices = mp_ncpus;
4207 } else {
4208 if (sc->num_slices > mxge_max_slices)
4209 sc->num_slices = mxge_max_slices;
4211 /* make sure it is a power of two */
4212 while (sc->num_slices & (sc->num_slices - 1))
4213 sc->num_slices--;
4215 if (mxge_verbose)
4216 device_printf(sc->dev, "using %d slices\n",
4217 sc->num_slices);
4219 return;
4221 abort_with_fw:
4222 sc->fw_name = old_fw;
4223 (void) mxge_load_firmware(sc, 0);
4226 static int
4227 mxge_add_msix_irqs(mxge_softc_t *sc)
4229 size_t bytes;
4230 int count, err, i, rid;
4232 rid = PCIR_BAR(2);
4233 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4234 &rid, RF_ACTIVE);
4236 if (sc->msix_table_res == NULL) {
4237 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4238 return ENXIO;
4241 count = sc->num_slices;
4242 err = pci_alloc_msix(sc->dev, &count);
4243 if (err != 0) {
4244 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4245 "err = %d \n", sc->num_slices, err);
4246 goto abort_with_msix_table;
4248 if (count < sc->num_slices) {
4249 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4250 count, sc->num_slices);
4251 device_printf(sc->dev,
4252 "Try setting hw.mxge.max_slices to %d\n",
4253 count);
4254 err = ENOSPC;
4255 goto abort_with_msix;
4257 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4258 sc->msix_irq_res = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4259 if (sc->msix_irq_res == NULL) {
4260 err = ENOMEM;
4261 goto abort_with_msix;
4264 for (i = 0; i < sc->num_slices; i++) {
4265 rid = i + 1;
4266 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4267 SYS_RES_IRQ,
4268 &rid, RF_ACTIVE);
4269 if (sc->msix_irq_res[i] == NULL) {
4270 device_printf(sc->dev, "couldn't allocate IRQ res"
4271 " for message %d\n", i);
4272 err = ENXIO;
4273 goto abort_with_res;
4277 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4278 sc->msix_ih = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4280 for (i = 0; i < sc->num_slices; i++) {
4281 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4282 INTR_TYPE_NET | INTR_MPSAFE,
4283 #if __FreeBSD_version > 700030
4284 NULL,
4285 #endif
4286 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4287 if (err != 0) {
4288 device_printf(sc->dev, "couldn't setup intr for "
4289 "message %d\n", i);
4290 goto abort_with_intr;
4294 if (mxge_verbose) {
4295 device_printf(sc->dev, "using %d msix IRQs:",
4296 sc->num_slices);
4297 for (i = 0; i < sc->num_slices; i++)
4298 printf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4299 printf("\n");
4301 return (0);
4303 abort_with_intr:
4304 for (i = 0; i < sc->num_slices; i++) {
4305 if (sc->msix_ih[i] != NULL) {
4306 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4307 sc->msix_ih[i]);
4308 sc->msix_ih[i] = NULL;
4311 kfree(sc->msix_ih, M_DEVBUF);
4314 abort_with_res:
4315 for (i = 0; i < sc->num_slices; i++) {
4316 rid = i + 1;
4317 if (sc->msix_irq_res[i] != NULL)
4318 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4319 sc->msix_irq_res[i]);
4320 sc->msix_irq_res[i] = NULL;
4322 kfree(sc->msix_irq_res, M_DEVBUF);
4325 abort_with_msix:
4326 pci_release_msi(sc->dev);
4328 abort_with_msix_table:
4329 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4330 sc->msix_table_res);
4332 return err;
4335 static int
4336 mxge_add_single_irq(mxge_softc_t *sc)
4338 int count, err, rid;
4340 count = pci_msi_count(sc->dev);
4341 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4342 rid = 1;
4343 } else {
4344 rid = 0;
4345 sc->legacy_irq = 1;
4347 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4348 1, RF_SHAREABLE | RF_ACTIVE);
4349 if (sc->irq_res == NULL) {
4350 device_printf(sc->dev, "could not alloc interrupt\n");
4351 return ENXIO;
4353 if (mxge_verbose)
4354 device_printf(sc->dev, "using %s irq %ld\n",
4355 sc->legacy_irq ? "INTx" : "MSI",
4356 rman_get_start(sc->irq_res));
4357 err = bus_setup_intr(sc->dev, sc->irq_res,
4358 INTR_TYPE_NET | INTR_MPSAFE,
4359 #if __FreeBSD_version > 700030
4360 NULL,
4361 #endif
4362 mxge_intr, &sc->ss[0], &sc->ih);
4363 if (err != 0) {
4364 bus_release_resource(sc->dev, SYS_RES_IRQ,
4365 sc->legacy_irq ? 0 : 1, sc->irq_res);
4366 if (!sc->legacy_irq)
4367 pci_release_msi(sc->dev);
4369 return err;
4372 static void
4373 mxge_rem_msix_irqs(mxge_softc_t *sc)
4375 int i, rid;
4377 for (i = 0; i < sc->num_slices; i++) {
4378 if (sc->msix_ih[i] != NULL) {
4379 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4380 sc->msix_ih[i]);
4381 sc->msix_ih[i] = NULL;
4384 kfree(sc->msix_ih, M_DEVBUF);
4386 for (i = 0; i < sc->num_slices; i++) {
4387 rid = i + 1;
4388 if (sc->msix_irq_res[i] != NULL)
4389 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4390 sc->msix_irq_res[i]);
4391 sc->msix_irq_res[i] = NULL;
4393 kfree(sc->msix_irq_res, M_DEVBUF);
4395 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4396 sc->msix_table_res);
4398 pci_release_msi(sc->dev);
4399 return;
4402 static void
4403 mxge_rem_single_irq(mxge_softc_t *sc)
4405 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4406 bus_release_resource(sc->dev, SYS_RES_IRQ,
4407 sc->legacy_irq ? 0 : 1, sc->irq_res);
4408 if (!sc->legacy_irq)
4409 pci_release_msi(sc->dev);
4412 static void
4413 mxge_rem_irq(mxge_softc_t *sc)
4415 if (sc->num_slices > 1)
4416 mxge_rem_msix_irqs(sc);
4417 else
4418 mxge_rem_single_irq(sc);
4421 static int
4422 mxge_add_irq(mxge_softc_t *sc)
4424 int err;
4426 if (sc->num_slices > 1)
4427 err = mxge_add_msix_irqs(sc);
4428 else
4429 err = mxge_add_single_irq(sc);
4431 if (0 && err == 0 && sc->num_slices > 1) {
4432 mxge_rem_msix_irqs(sc);
4433 err = mxge_add_msix_irqs(sc);
4435 return err;
4439 static int
4440 mxge_attach(device_t dev)
4442 mxge_softc_t *sc = device_get_softc(dev);
4443 struct ifnet *ifp;
4444 int err, rid;
4446 sc->dev = dev;
4447 mxge_fetch_tunables(sc);
4449 err = bus_dma_tag_create(NULL, /* parent */
4450 1, /* alignment */
4451 0, /* boundary */
4452 BUS_SPACE_MAXADDR, /* low */
4453 BUS_SPACE_MAXADDR, /* high */
4454 NULL, NULL, /* filter */
4455 65536 + 256, /* maxsize */
4456 MXGE_MAX_SEND_DESC, /* num segs */
4457 65536, /* maxsegsize */
4458 0, /* flags */
4459 NULL, NULL, /* lock */
4460 &sc->parent_dmat); /* tag */
4462 if (err != 0) {
4463 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4464 err);
4465 goto abort_with_nothing;
4468 ifp = sc->ifp = if_alloc(IFT_ETHER);
4469 if (ifp == NULL) {
4470 device_printf(dev, "can not if_alloc()\n");
4471 err = ENOSPC;
4472 goto abort_with_parent_dmat;
4474 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4476 snprintf(sc->cmd_lock_name, sizeof(sc->cmd_lock_name), "%s:cmd",
4477 device_get_nameunit(dev));
4478 lock_init(&sc->cmd_lock, sc->cmd_lock_name, 0, LK_CANRECURSE);
4479 snprintf(sc->driver_lock_name, sizeof(sc->driver_lock_name),
4480 "%s:drv", device_get_nameunit(dev));
4481 lock_init(&sc->driver_lock, sc->driver_lock_name,
4482 0, LK_CANRECURSE);
4484 callout_init(&sc->co_hdl);
4486 mxge_setup_cfg_space(sc);
4488 /* Map the board into the kernel */
4489 rid = PCIR_BARS;
4490 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4491 ~0, 1, RF_ACTIVE);
4492 if (sc->mem_res == NULL) {
4493 device_printf(dev, "could not map memory\n");
4494 err = ENXIO;
4495 goto abort_with_lock;
4497 sc->sram = rman_get_virtual(sc->mem_res);
4498 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4499 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4500 device_printf(dev, "impossible memory region size %ld\n",
4501 rman_get_size(sc->mem_res));
4502 err = ENXIO;
4503 goto abort_with_mem_res;
4506 /* make NULL terminated copy of the EEPROM strings section of
4507 lanai SRAM */
4508 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4509 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4510 rman_get_bushandle(sc->mem_res),
4511 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4512 sc->eeprom_strings,
4513 MXGE_EEPROM_STRINGS_SIZE - 2);
4514 err = mxge_parse_strings(sc);
4515 if (err != 0)
4516 goto abort_with_mem_res;
4518 /* Enable write combining for efficient use of PCIe bus */
4519 mxge_enable_wc(sc);
4521 /* Allocate the out of band dma memory */
4522 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4523 sizeof (mxge_cmd_t), 64);
4524 if (err != 0)
4525 goto abort_with_mem_res;
4526 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4527 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4528 if (err != 0)
4529 goto abort_with_cmd_dma;
4531 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4532 if (err != 0)
4533 goto abort_with_zeropad_dma;
4535 /* select & load the firmware */
4536 err = mxge_select_firmware(sc);
4537 if (err != 0)
4538 goto abort_with_dmabench;
4539 sc->intr_coal_delay = mxge_intr_coal_delay;
4541 mxge_slice_probe(sc);
4542 err = mxge_alloc_slices(sc);
4543 if (err != 0)
4544 goto abort_with_dmabench;
4546 err = mxge_reset(sc, 0);
4547 if (err != 0)
4548 goto abort_with_slices;
4550 err = mxge_alloc_rings(sc);
4551 if (err != 0) {
4552 device_printf(sc->dev, "failed to allocate rings\n");
4553 goto abort_with_dmabench;
4556 err = mxge_add_irq(sc);
4557 if (err != 0) {
4558 device_printf(sc->dev, "failed to add irq\n");
4559 goto abort_with_rings;
4562 ifp->if_baudrate = IF_Gbps(10UL);
4563 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4564 IFCAP_VLAN_MTU;
4565 #ifdef INET
4566 ifp->if_capabilities |= IFCAP_LRO;
4567 #endif
4569 #ifdef MXGE_NEW_VLAN_API
4570 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4571 #endif
4573 sc->max_mtu = mxge_max_mtu(sc);
4574 if (sc->max_mtu >= 9000)
4575 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4576 else
4577 device_printf(dev, "MTU limited to %d. Install "
4578 "latest firmware for 9000 byte jumbo support\n",
4579 sc->max_mtu - ETHER_HDR_LEN);
4580 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4581 ifp->if_capenable = ifp->if_capabilities;
4582 if (sc->lro_cnt == 0)
4583 ifp->if_capenable &= ~IFCAP_LRO;
4584 sc->csum_flag = 1;
4585 ifp->if_init = mxge_init;
4586 ifp->if_softc = sc;
4587 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4588 ifp->if_ioctl = mxge_ioctl;
4589 ifp->if_start = mxge_start;
4590 /* Initialise the ifmedia structure */
4591 ifmedia_init(&sc->media, 0, mxge_media_change,
4592 mxge_media_status);
4593 mxge_set_media(sc, IFM_ETHER | IFM_AUTO);
4594 mxge_media_probe(sc);
4595 sc->dying = 0;
4596 ether_ifattach(ifp, sc->mac_addr);
4597 /* ether_ifattach sets mtu to ETHERMTU */
4598 if (mxge_initial_mtu != ETHERMTU)
4599 mxge_change_mtu(sc, mxge_initial_mtu);
4601 mxge_add_sysctls(sc);
4602 #ifdef IFNET_BUF_RING
4603 ifp->if_transmit = mxge_transmit;
4604 ifp->if_qflush = mxge_qflush;
4605 #endif
4606 return 0;
4608 abort_with_rings:
4609 mxge_free_rings(sc);
4610 abort_with_slices:
4611 mxge_free_slices(sc);
4612 abort_with_dmabench:
4613 mxge_dma_free(&sc->dmabench_dma);
4614 abort_with_zeropad_dma:
4615 mxge_dma_free(&sc->zeropad_dma);
4616 abort_with_cmd_dma:
4617 mxge_dma_free(&sc->cmd_dma);
4618 abort_with_mem_res:
4619 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4620 abort_with_lock:
4621 pci_disable_busmaster(dev);
4622 lockuninit(&sc->cmd_lock);
4623 lockuninit(&sc->driver_lock);
4624 if_free(ifp);
4625 abort_with_parent_dmat:
4626 bus_dma_tag_destroy(sc->parent_dmat);
4628 abort_with_nothing:
4629 return err;
4632 static int
4633 mxge_detach(device_t dev)
4635 mxge_softc_t *sc = device_get_softc(dev);
4637 if (mxge_vlans_active(sc)) {
4638 device_printf(sc->dev,
4639 "Detach vlans before removing module\n");
4640 return EBUSY;
4642 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
4643 sc->dying = 1;
4644 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4645 mxge_close(sc);
4646 lock(&sc->driver_lock, LK_RELEASE);
4647 ether_ifdetach(sc->ifp);
4648 callout_drain(&sc->co_hdl);
4649 ifmedia_removeall(&sc->media);
4650 mxge_dummy_rdma(sc, 0);
4651 mxge_rem_sysctls(sc);
4652 mxge_rem_irq(sc);
4653 mxge_free_rings(sc);
4654 mxge_free_slices(sc);
4655 mxge_dma_free(&sc->dmabench_dma);
4656 mxge_dma_free(&sc->zeropad_dma);
4657 mxge_dma_free(&sc->cmd_dma);
4658 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4659 pci_disable_busmaster(dev);
4660 lockuninit(&sc->cmd_lock);
4661 lockuninit(&sc->driver_lock);
4662 if_free(sc->ifp);
4663 bus_dma_tag_destroy(sc->parent_dmat);
4664 return 0;
4667 static int
4668 mxge_shutdown(device_t dev)
4670 return 0;
4674 This file uses Myri10GE driver indentation.
4676 Local Variables:
4677 c-file-style:"linux"
4678 tab-width:8
4679 End: