get mxge to build, stage 14/many
[dragonfly.git] / sys / dev / netif / mxge / if_mxge.c
blob8d6da4d13d4da8225f115a55f621fde029a90367
1 /******************************************************************************
3 Copyright (c) 2006-2009, Myricom Inc.
4 All rights reserved.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 /*__FBSDID("$FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $");*/
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/sockio.h>
39 #include <sys/mbuf.h>
40 #include <sys/malloc.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/module.h>
44 #include <sys/socket.h>
45 #include <sys/sysctl.h>
47 /* count xmits ourselves, rather than via drbr */
48 #define NO_SLOW_STATS
49 #include <net/if.h>
50 #include <net/if_arp.h>
51 #include <net/ethernet.h>
52 #include <net/if_dl.h>
53 #include <net/if_media.h>
55 #include <net/bpf.h>
57 #include <net/if_types.h>
58 #include <net/vlan/if_vlan_var.h>
59 #include <net/zlib.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/tcp.h>
66 #include <machine/resource.h>
67 #include <sys/bus.h>
68 #include <sys/rman.h>
70 #include <bus/pci/pcireg.h>
71 #include <bus/pci/pcivar.h>
72 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
74 #include <vm/vm.h> /* for pmap_mapdev() */
75 #include <vm/pmap.h>
77 #if defined(__i386) || defined(__amd64)
78 #include <machine/specialreg.h>
79 #endif
81 #include <dev/netif/mxge/mxge_mcp.h>
82 #include <dev/netif/mxge/mcp_gen_header.h>
83 /*#define MXGE_FAKE_IFP*/
84 #include <dev/netif/mxge/if_mxge_var.h>
85 #ifdef IFNET_BUF_RING
86 #include <sys/buf_ring.h>
87 #endif
89 #include "opt_inet.h"
91 /* tunable params */
92 static int mxge_nvidia_ecrc_enable = 1;
93 static int mxge_force_firmware = 0;
94 static int mxge_intr_coal_delay = 30;
95 static int mxge_deassert_wait = 1;
96 static int mxge_flow_control = 1;
97 static int mxge_verbose = 0;
98 static int mxge_lro_cnt = 8;
99 static int mxge_ticks;
100 static int mxge_max_slices = 1;
101 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
102 static int mxge_always_promisc = 0;
103 static int mxge_initial_mtu = ETHERMTU_JUMBO;
104 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
105 static char *mxge_fw_aligned = "mxge_eth_z8e";
106 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
107 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
109 static int mxge_probe(device_t dev);
110 static int mxge_attach(device_t dev);
111 static int mxge_detach(device_t dev);
112 static int mxge_shutdown(device_t dev);
113 static void mxge_intr(void *arg);
115 static device_method_t mxge_methods[] =
117 /* Device interface */
118 DEVMETHOD(device_probe, mxge_probe),
119 DEVMETHOD(device_attach, mxge_attach),
120 DEVMETHOD(device_detach, mxge_detach),
121 DEVMETHOD(device_shutdown, mxge_shutdown),
122 {0, 0}
125 static driver_t mxge_driver =
127 "mxge",
128 mxge_methods,
129 sizeof(mxge_softc_t),
132 static devclass_t mxge_devclass;
134 /* Declare ourselves to be a child of the PCI bus.*/
135 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
136 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
137 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
139 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
140 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
141 static int mxge_close(mxge_softc_t *sc);
142 static int mxge_open(mxge_softc_t *sc);
143 static void mxge_tick(void *arg);
145 static int
146 mxge_probe(device_t dev)
148 int rev;
151 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
152 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
153 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
154 rev = pci_get_revid(dev);
155 switch (rev) {
156 case MXGE_PCI_REV_Z8E:
157 device_set_desc(dev, "Myri10G-PCIE-8A");
158 break;
159 case MXGE_PCI_REV_Z8ES:
160 device_set_desc(dev, "Myri10G-PCIE-8B");
161 break;
162 default:
163 device_set_desc(dev, "Myri10G-PCIE-8??");
164 device_printf(dev, "Unrecognized rev %d NIC\n",
165 rev);
166 break;
168 return 0;
170 return ENXIO;
173 static void
174 mxge_enable_wc(mxge_softc_t *sc)
176 #if defined(__i386) || defined(__amd64)
177 vm_offset_t len;
178 int err;
180 sc->wc = 1;
181 len = rman_get_size(sc->mem_res);
182 err = pmap_change_attr((vm_offset_t) sc->sram,
183 len, PAT_WRITE_COMBINING);
184 if (err != 0) {
185 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
186 err);
187 sc->wc = 0;
189 #endif
193 /* callback to get our DMA address */
194 static void
195 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
196 int error)
198 if (error == 0) {
199 *(bus_addr_t *) arg = segs->ds_addr;
203 static int
204 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
205 bus_size_t alignment)
207 int err;
208 device_t dev = sc->dev;
209 bus_size_t boundary, maxsegsize;
211 if (bytes > 4096 && alignment == 4096) {
212 boundary = 0;
213 maxsegsize = bytes;
214 } else {
215 boundary = 4096;
216 maxsegsize = 4096;
219 /* allocate DMAable memory tags */
220 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
221 alignment, /* alignment */
222 boundary, /* boundary */
223 BUS_SPACE_MAXADDR, /* low */
224 BUS_SPACE_MAXADDR, /* high */
225 NULL, NULL, /* filter */
226 bytes, /* maxsize */
227 1, /* num segs */
228 maxsegsize, /* maxsegsize */
229 BUS_DMA_COHERENT, /* flags */
230 NULL, NULL, /* lock */
231 &dma->dmat); /* tag */
232 if (err != 0) {
233 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
234 return err;
237 /* allocate DMAable memory & map */
238 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
239 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
240 | BUS_DMA_ZERO), &dma->map);
241 if (err != 0) {
242 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
243 goto abort_with_dmat;
246 /* load the memory */
247 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
248 mxge_dmamap_callback,
249 (void *)&dma->bus_addr, 0);
250 if (err != 0) {
251 device_printf(dev, "couldn't load map (err = %d)\n", err);
252 goto abort_with_mem;
254 return 0;
256 abort_with_mem:
257 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
258 abort_with_dmat:
259 (void)bus_dma_tag_destroy(dma->dmat);
260 return err;
264 static void
265 mxge_dma_free(mxge_dma_t *dma)
267 bus_dmamap_unload(dma->dmat, dma->map);
268 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
269 (void)bus_dma_tag_destroy(dma->dmat);
273 * The eeprom strings on the lanaiX have the format
274 * SN=x\0
275 * MAC=x:x:x:x:x:x\0
276 * PC=text\0
279 static int
280 mxge_parse_strings(mxge_softc_t *sc)
282 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
284 char *ptr, *limit;
285 int i, found_mac;
287 ptr = sc->eeprom_strings;
288 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
289 found_mac = 0;
290 while (ptr < limit && *ptr != '\0') {
291 if (memcmp(ptr, "MAC=", 4) == 0) {
292 ptr += 1;
293 sc->mac_addr_string = ptr;
294 for (i = 0; i < 6; i++) {
295 ptr += 3;
296 if ((ptr + 2) > limit)
297 goto abort;
298 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
299 found_mac = 1;
301 } else if (memcmp(ptr, "PC=", 3) == 0) {
302 ptr += 3;
303 strncpy(sc->product_code_string, ptr,
304 sizeof (sc->product_code_string) - 1);
305 } else if (memcmp(ptr, "SN=", 3) == 0) {
306 ptr += 3;
307 strncpy(sc->serial_number_string, ptr,
308 sizeof (sc->serial_number_string) - 1);
310 MXGE_NEXT_STRING(ptr);
313 if (found_mac)
314 return 0;
316 abort:
317 device_printf(sc->dev, "failed to parse eeprom_strings\n");
319 return ENXIO;
322 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
323 static void
324 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
326 uint32_t val;
327 unsigned long base, off;
328 char *va, *cfgptr;
329 device_t pdev, mcp55;
330 uint16_t vendor_id, device_id, word;
331 uintptr_t bus, slot, func, ivend, idev;
332 uint32_t *ptr32;
335 if (!mxge_nvidia_ecrc_enable)
336 return;
338 pdev = device_get_parent(device_get_parent(sc->dev));
339 if (pdev == NULL) {
340 device_printf(sc->dev, "could not find parent?\n");
341 return;
343 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
344 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
346 if (vendor_id != 0x10de)
347 return;
349 base = 0;
351 if (device_id == 0x005d) {
352 /* ck804, base address is magic */
353 base = 0xe0000000UL;
354 } else if (device_id >= 0x0374 && device_id <= 0x378) {
355 /* mcp55, base address stored in chipset */
356 mcp55 = pci_find_bsf(0, 0, 0);
357 if (mcp55 &&
358 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
359 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
360 word = pci_read_config(mcp55, 0x90, 2);
361 base = ((unsigned long)word & 0x7ffeU) << 25;
364 if (!base)
365 return;
367 /* XXXX
368 Test below is commented because it is believed that doing
369 config read/write beyond 0xff will access the config space
370 for the next larger function. Uncomment this and remove
371 the hacky pmap_mapdev() way of accessing config space when
372 FreeBSD grows support for extended pcie config space access
374 #if 0
375 /* See if we can, by some miracle, access the extended
376 config space */
377 val = pci_read_config(pdev, 0x178, 4);
378 if (val != 0xffffffff) {
379 val |= 0x40;
380 pci_write_config(pdev, 0x178, val, 4);
381 return;
383 #endif
384 /* Rather than using normal pci config space writes, we must
385 * map the Nvidia config space ourselves. This is because on
386 * opteron/nvidia class machine the 0xe000000 mapping is
387 * handled by the nvidia chipset, that means the internal PCI
388 * device (the on-chip northbridge), or the amd-8131 bridge
389 * and things behind them are not visible by this method.
392 BUS_READ_IVAR(device_get_parent(pdev), pdev,
393 PCI_IVAR_BUS, &bus);
394 BUS_READ_IVAR(device_get_parent(pdev), pdev,
395 PCI_IVAR_SLOT, &slot);
396 BUS_READ_IVAR(device_get_parent(pdev), pdev,
397 PCI_IVAR_FUNCTION, &func);
398 BUS_READ_IVAR(device_get_parent(pdev), pdev,
399 PCI_IVAR_VENDOR, &ivend);
400 BUS_READ_IVAR(device_get_parent(pdev), pdev,
401 PCI_IVAR_DEVICE, &idev);
403 off = base
404 + 0x00100000UL * (unsigned long)bus
405 + 0x00001000UL * (unsigned long)(func
406 + 8 * slot);
408 /* map it into the kernel */
409 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
412 if (va == NULL) {
413 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
414 return;
416 /* get a pointer to the config space mapped into the kernel */
417 cfgptr = va + (off & PAGE_MASK);
419 /* make sure that we can really access it */
420 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
421 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
422 if (! (vendor_id == ivend && device_id == idev)) {
423 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
424 vendor_id, device_id);
425 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
426 return;
429 ptr32 = (uint32_t*)(cfgptr + 0x178);
430 val = *ptr32;
432 if (val == 0xffffffff) {
433 device_printf(sc->dev, "extended mapping failed\n");
434 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
435 return;
437 *ptr32 = val | 0x40;
438 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
439 if (mxge_verbose)
440 device_printf(sc->dev,
441 "Enabled ECRC on upstream Nvidia bridge "
442 "at %d:%d:%d\n",
443 (int)bus, (int)slot, (int)func);
444 return;
446 #else
447 static void
448 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
450 device_printf(sc->dev,
451 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
452 return;
454 #endif
457 static int
458 mxge_dma_test(mxge_softc_t *sc, int test_type)
460 mxge_cmd_t cmd;
461 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
462 int status;
463 uint32_t len;
464 char *test = " ";
467 /* Run a small DMA test.
468 * The magic multipliers to the length tell the firmware
469 * to do DMA read, write, or read+write tests. The
470 * results are returned in cmd.data0. The upper 16
471 * bits of the return is the number of transfers completed.
472 * The lower 16 bits is the time in 0.5us ticks that the
473 * transfers took to complete.
476 len = sc->tx_boundary;
478 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
479 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
480 cmd.data2 = len * 0x10000;
481 status = mxge_send_cmd(sc, test_type, &cmd);
482 if (status != 0) {
483 test = "read";
484 goto abort;
486 sc->read_dma = ((cmd.data0>>16) * len * 2) /
487 (cmd.data0 & 0xffff);
488 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
489 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
490 cmd.data2 = len * 0x1;
491 status = mxge_send_cmd(sc, test_type, &cmd);
492 if (status != 0) {
493 test = "write";
494 goto abort;
496 sc->write_dma = ((cmd.data0>>16) * len * 2) /
497 (cmd.data0 & 0xffff);
499 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
500 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
501 cmd.data2 = len * 0x10001;
502 status = mxge_send_cmd(sc, test_type, &cmd);
503 if (status != 0) {
504 test = "read/write";
505 goto abort;
507 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
508 (cmd.data0 & 0xffff);
510 abort:
511 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
512 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
513 test, status);
515 return status;
519 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
520 * when the PCI-E Completion packets are aligned on an 8-byte
521 * boundary. Some PCI-E chip sets always align Completion packets; on
522 * the ones that do not, the alignment can be enforced by enabling
523 * ECRC generation (if supported).
525 * When PCI-E Completion packets are not aligned, it is actually more
526 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
528 * If the driver can neither enable ECRC nor verify that it has
529 * already been enabled, then it must use a firmware image which works
530 * around unaligned completion packets (ethp_z8e.dat), and it should
531 * also ensure that it never gives the device a Read-DMA which is
532 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
533 * enabled, then the driver should use the aligned (eth_z8e.dat)
534 * firmware image, and set tx_boundary to 4KB.
537 static int
538 mxge_firmware_probe(mxge_softc_t *sc)
540 device_t dev = sc->dev;
541 int reg, status;
542 uint16_t pectl;
544 sc->tx_boundary = 4096;
546 * Verify the max read request size was set to 4KB
547 * before trying the test with 4KB.
549 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
550 pectl = pci_read_config(dev, reg + 0x8, 2);
551 if ((pectl & (5 << 12)) != (5 << 12)) {
552 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
553 pectl);
554 sc->tx_boundary = 2048;
559 * load the optimized firmware (which assumes aligned PCIe
560 * completions) in order to see if it works on this host.
562 sc->fw_name = mxge_fw_aligned;
563 status = mxge_load_firmware(sc, 1);
564 if (status != 0) {
565 return status;
569 * Enable ECRC if possible
571 mxge_enable_nvidia_ecrc(sc);
574 * Run a DMA test which watches for unaligned completions and
575 * aborts on the first one seen.
578 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
579 if (status == 0)
580 return 0; /* keep the aligned firmware */
582 if (status != E2BIG)
583 device_printf(dev, "DMA test failed: %d\n", status);
584 if (status == ENOSYS)
585 device_printf(dev, "Falling back to ethp! "
586 "Please install up to date fw\n");
587 return status;
590 static int
591 mxge_select_firmware(mxge_softc_t *sc)
593 int aligned = 0;
596 if (mxge_force_firmware != 0) {
597 if (mxge_force_firmware == 1)
598 aligned = 1;
599 else
600 aligned = 0;
601 if (mxge_verbose)
602 device_printf(sc->dev,
603 "Assuming %s completions (forced)\n",
604 aligned ? "aligned" : "unaligned");
605 goto abort;
608 /* if the PCIe link width is 4 or less, we can use the aligned
609 firmware and skip any checks */
610 if (sc->link_width != 0 && sc->link_width <= 4) {
611 device_printf(sc->dev,
612 "PCIe x%d Link, expect reduced performance\n",
613 sc->link_width);
614 aligned = 1;
615 goto abort;
618 if (0 == mxge_firmware_probe(sc))
619 return 0;
621 abort:
622 if (aligned) {
623 sc->fw_name = mxge_fw_aligned;
624 sc->tx_boundary = 4096;
625 } else {
626 sc->fw_name = mxge_fw_unaligned;
627 sc->tx_boundary = 2048;
629 return (mxge_load_firmware(sc, 0));
632 union qualhack
634 const char *ro_char;
635 char *rw_char;
638 static int
639 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
643 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
644 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
645 be32toh(hdr->mcp_type));
646 return EIO;
649 /* save firmware version for sysctl */
650 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
651 if (mxge_verbose)
652 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
654 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
655 &sc->fw_ver_minor, &sc->fw_ver_tiny);
657 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
658 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
659 device_printf(sc->dev, "Found firmware version %s\n",
660 sc->fw_version);
661 device_printf(sc->dev, "Driver needs %d.%d\n",
662 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
663 return EINVAL;
665 return 0;
669 static void *
670 z_alloc(void *nil, u_int items, u_int size)
672 void *ptr;
674 ptr = kmalloc(items * size, M_TEMP, M_NOWAIT);
675 return ptr;
678 static void
679 z_free(void *nil, void *ptr)
681 kfree(ptr, M_TEMP);
685 static int
686 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
688 z_stream zs;
689 char *inflate_buffer;
690 const struct firmware *fw;
691 const mcp_gen_header_t *hdr;
692 unsigned hdr_offset;
693 int status;
694 unsigned int i;
695 char dummy;
696 size_t fw_len;
698 fw = firmware_get(sc->fw_name);
699 if (fw == NULL) {
700 device_printf(sc->dev, "Could not find firmware image %s\n",
701 sc->fw_name);
702 return ENOENT;
707 /* setup zlib and decompress f/w */
708 bzero(&zs, sizeof (zs));
709 zs.zalloc = z_alloc;
710 zs.zfree = z_free;
711 status = inflateInit(&zs);
712 if (status != Z_OK) {
713 status = EIO;
714 goto abort_with_fw;
717 /* the uncompressed size is stored as the firmware version,
718 which would otherwise go unused */
719 fw_len = (size_t) fw->version;
720 inflate_buffer = kmalloc(fw_len, M_TEMP, M_NOWAIT);
721 if (inflate_buffer == NULL)
722 goto abort_with_zs;
723 zs.avail_in = fw->datasize;
724 zs.next_in = __DECONST(char *, fw->data);
725 zs.avail_out = fw_len;
726 zs.next_out = inflate_buffer;
727 status = inflate(&zs, Z_FINISH);
728 if (status != Z_STREAM_END) {
729 device_printf(sc->dev, "zlib %d\n", status);
730 status = EIO;
731 goto abort_with_buffer;
734 /* check id */
735 hdr_offset = htobe32(*(const uint32_t *)
736 (inflate_buffer + MCP_HEADER_PTR_OFFSET));
737 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
738 device_printf(sc->dev, "Bad firmware file");
739 status = EIO;
740 goto abort_with_buffer;
742 hdr = (const void*)(inflate_buffer + hdr_offset);
744 status = mxge_validate_firmware(sc, hdr);
745 if (status != 0)
746 goto abort_with_buffer;
748 /* Copy the inflated firmware to NIC SRAM. */
749 for (i = 0; i < fw_len; i += 256) {
750 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
751 inflate_buffer + i,
752 min(256U, (unsigned)(fw_len - i)));
753 wmb();
754 dummy = *sc->sram;
755 wmb();
758 *limit = fw_len;
759 status = 0;
760 abort_with_buffer:
761 kfree(inflate_buffer, M_TEMP);
762 abort_with_zs:
763 inflateEnd(&zs);
764 abort_with_fw:
765 firmware_put(fw, FIRMWARE_UNLOAD);
766 return status;
770 * Enable or disable periodic RDMAs from the host to make certain
771 * chipsets resend dropped PCIe messages
774 static void
775 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
777 char buf_bytes[72];
778 volatile uint32_t *confirm;
779 volatile char *submit;
780 uint32_t *buf, dma_low, dma_high;
781 int i;
783 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
785 /* clear confirmation addr */
786 confirm = (volatile uint32_t *)sc->cmd;
787 *confirm = 0;
788 wmb();
790 /* send an rdma command to the PCIe engine, and wait for the
791 response in the confirmation address. The firmware should
792 write a -1 there to indicate it is alive and well
795 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
796 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
797 buf[0] = htobe32(dma_high); /* confirm addr MSW */
798 buf[1] = htobe32(dma_low); /* confirm addr LSW */
799 buf[2] = htobe32(0xffffffff); /* confirm data */
800 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
801 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
802 buf[3] = htobe32(dma_high); /* dummy addr MSW */
803 buf[4] = htobe32(dma_low); /* dummy addr LSW */
804 buf[5] = htobe32(enable); /* enable? */
807 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
809 mxge_pio_copy(submit, buf, 64);
810 wmb();
811 DELAY(1000);
812 wmb();
813 i = 0;
814 while (*confirm != 0xffffffff && i < 20) {
815 DELAY(1000);
816 i++;
818 if (*confirm != 0xffffffff) {
819 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
820 (enable ? "enable" : "disable"), confirm,
821 *confirm);
823 return;
826 static int
827 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
829 mcp_cmd_t *buf;
830 char buf_bytes[sizeof(*buf) + 8];
831 volatile mcp_cmd_response_t *response = sc->cmd;
832 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
833 uint32_t dma_low, dma_high;
834 int err, sleep_total = 0;
836 /* ensure buf is aligned to 8 bytes */
837 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
839 buf->data0 = htobe32(data->data0);
840 buf->data1 = htobe32(data->data1);
841 buf->data2 = htobe32(data->data2);
842 buf->cmd = htobe32(cmd);
843 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
844 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
846 buf->response_addr.low = htobe32(dma_low);
847 buf->response_addr.high = htobe32(dma_high);
848 lockmgr(&sc->cmd_lock, LK_EXCLUSIVE);
849 response->result = 0xffffffff;
850 wmb();
851 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
853 /* wait up to 20ms */
854 err = EAGAIN;
855 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
856 bus_dmamap_sync(sc->cmd_dma.dmat,
857 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
858 wmb();
859 switch (be32toh(response->result)) {
860 case 0:
861 data->data0 = be32toh(response->data);
862 err = 0;
863 break;
864 case 0xffffffff:
865 DELAY(1000);
866 break;
867 case MXGEFW_CMD_UNKNOWN:
868 err = ENOSYS;
869 break;
870 case MXGEFW_CMD_ERROR_UNALIGNED:
871 err = E2BIG;
872 break;
873 case MXGEFW_CMD_ERROR_BUSY:
874 err = EBUSY;
875 break;
876 default:
877 device_printf(sc->dev,
878 "mxge: command %d "
879 "failed, result = %d\n",
880 cmd, be32toh(response->result));
881 err = ENXIO;
882 break;
884 if (err != EAGAIN)
885 break;
887 if (err == EAGAIN)
888 device_printf(sc->dev, "mxge: command %d timed out"
889 "result = %d\n",
890 cmd, be32toh(response->result));
891 lockmgr(&sc->cmd_lock, LK_RELEASE);
892 return err;
895 static int
896 mxge_adopt_running_firmware(mxge_softc_t *sc)
898 struct mcp_gen_header *hdr;
899 const size_t bytes = sizeof (struct mcp_gen_header);
900 size_t hdr_offset;
901 int status;
903 /* find running firmware header */
904 hdr_offset = htobe32(*(volatile uint32_t *)
905 (sc->sram + MCP_HEADER_PTR_OFFSET));
907 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
908 device_printf(sc->dev,
909 "Running firmware has bad header offset (%d)\n",
910 (int)hdr_offset);
911 return EIO;
914 /* copy header of running firmware from SRAM to host memory to
915 * validate firmware */
916 hdr = kmalloc(bytes, M_DEVBUF, M_NOWAIT);
917 if (hdr == NULL) {
918 device_printf(sc->dev, "could not kmalloc firmware hdr\n");
919 return ENOMEM;
921 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
922 rman_get_bushandle(sc->mem_res),
923 hdr_offset, (char *)hdr, bytes);
924 status = mxge_validate_firmware(sc, hdr);
925 kfree(hdr, M_DEVBUF);
928 * check to see if adopted firmware has bug where adopting
929 * it will cause broadcasts to be filtered unless the NIC
930 * is kept in ALLMULTI mode
932 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
933 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
934 sc->adopted_rx_filter_bug = 1;
935 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
936 "working around rx filter bug\n",
937 sc->fw_ver_major, sc->fw_ver_minor,
938 sc->fw_ver_tiny);
941 return status;
945 static int
946 mxge_load_firmware(mxge_softc_t *sc, int adopt)
948 volatile uint32_t *confirm;
949 volatile char *submit;
950 char buf_bytes[72];
951 uint32_t *buf, size, dma_low, dma_high;
952 int status, i;
954 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
956 size = sc->sram_size;
957 status = mxge_load_firmware_helper(sc, &size);
958 if (status) {
959 if (!adopt)
960 return status;
961 /* Try to use the currently running firmware, if
962 it is new enough */
963 status = mxge_adopt_running_firmware(sc);
964 if (status) {
965 device_printf(sc->dev,
966 "failed to adopt running firmware\n");
967 return status;
969 device_printf(sc->dev,
970 "Successfully adopted running firmware\n");
971 if (sc->tx_boundary == 4096) {
972 device_printf(sc->dev,
973 "Using firmware currently running on NIC"
974 ". For optimal\n");
975 device_printf(sc->dev,
976 "performance consider loading optimized "
977 "firmware\n");
979 sc->fw_name = mxge_fw_unaligned;
980 sc->tx_boundary = 2048;
981 return 0;
983 /* clear confirmation addr */
984 confirm = (volatile uint32_t *)sc->cmd;
985 *confirm = 0;
986 wmb();
987 /* send a reload command to the bootstrap MCP, and wait for the
988 response in the confirmation address. The firmware should
989 write a -1 there to indicate it is alive and well
992 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
993 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
995 buf[0] = htobe32(dma_high); /* confirm addr MSW */
996 buf[1] = htobe32(dma_low); /* confirm addr LSW */
997 buf[2] = htobe32(0xffffffff); /* confirm data */
999 /* FIX: All newest firmware should un-protect the bottom of
1000 the sram before handoff. However, the very first interfaces
1001 do not. Therefore the handoff copy must skip the first 8 bytes
1003 /* where the code starts*/
1004 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1005 buf[4] = htobe32(size - 8); /* length of code */
1006 buf[5] = htobe32(8); /* where to copy to */
1007 buf[6] = htobe32(0); /* where to jump to */
1009 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1010 mxge_pio_copy(submit, buf, 64);
1011 wmb();
1012 DELAY(1000);
1013 wmb();
1014 i = 0;
1015 while (*confirm != 0xffffffff && i < 20) {
1016 DELAY(1000*10);
1017 i++;
1018 bus_dmamap_sync(sc->cmd_dma.dmat,
1019 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1021 if (*confirm != 0xffffffff) {
1022 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1023 confirm, *confirm);
1025 return ENXIO;
1027 return 0;
1030 static int
1031 mxge_update_mac_address(mxge_softc_t *sc)
1033 mxge_cmd_t cmd;
1034 uint8_t *addr = sc->mac_addr;
1035 int status;
1038 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1039 | (addr[2] << 8) | addr[3]);
1041 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1043 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1044 return status;
1047 static int
1048 mxge_change_pause(mxge_softc_t *sc, int pause)
1050 mxge_cmd_t cmd;
1051 int status;
1053 if (pause)
1054 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1055 &cmd);
1056 else
1057 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1058 &cmd);
1060 if (status) {
1061 device_printf(sc->dev, "Failed to set flow control mode\n");
1062 return ENXIO;
1064 sc->pause = pause;
1065 return 0;
1068 static void
1069 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1071 mxge_cmd_t cmd;
1072 int status;
1074 if (mxge_always_promisc)
1075 promisc = 1;
1077 if (promisc)
1078 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1079 &cmd);
1080 else
1081 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1082 &cmd);
1084 if (status) {
1085 device_printf(sc->dev, "Failed to set promisc mode\n");
1089 static void
1090 mxge_set_multicast_list(mxge_softc_t *sc)
1092 mxge_cmd_t cmd;
1093 struct ifmultiaddr *ifma;
1094 struct ifnet *ifp = sc->ifp;
1095 int err;
1097 /* This firmware is known to not support multicast */
1098 if (!sc->fw_multicast_support)
1099 return;
1101 /* Disable multicast filtering while we play with the lists*/
1102 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1103 if (err != 0) {
1104 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1105 " error status: %d\n", err);
1106 return;
1109 if (sc->adopted_rx_filter_bug)
1110 return;
1112 if (ifp->if_flags & IFF_ALLMULTI)
1113 /* request to disable multicast filtering, so quit here */
1114 return;
1116 /* Flush all the filters */
1118 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1119 if (err != 0) {
1120 device_printf(sc->dev,
1121 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1122 ", error status: %d\n", err);
1123 return;
1126 /* Walk the multicast list, and add each address */
1128 if_maddr_rlock(ifp);
1129 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1130 if (ifma->ifma_addr->sa_family != AF_LINK)
1131 continue;
1132 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1133 &cmd.data0, 4);
1134 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1135 &cmd.data1, 2);
1136 cmd.data0 = htonl(cmd.data0);
1137 cmd.data1 = htonl(cmd.data1);
1138 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1139 if (err != 0) {
1140 device_printf(sc->dev, "Failed "
1141 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1142 "%d\t", err);
1143 /* abort, leaving multicast filtering off */
1144 if_maddr_runlock(ifp);
1145 return;
1148 if_maddr_runlock(ifp);
1149 /* Enable multicast filtering */
1150 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1151 if (err != 0) {
1152 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1153 ", error status: %d\n", err);
1157 static int
1158 mxge_max_mtu(mxge_softc_t *sc)
1160 mxge_cmd_t cmd;
1161 int status;
1163 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1164 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1166 /* try to set nbufs to see if it we can
1167 use virtually contiguous jumbos */
1168 cmd.data0 = 0;
1169 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1170 &cmd);
1171 if (status == 0)
1172 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1174 /* otherwise, we're limited to MJUMPAGESIZE */
1175 return MJUMPAGESIZE - MXGEFW_PAD;
1178 static int
1179 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1181 struct mxge_slice_state *ss;
1182 mxge_rx_done_t *rx_done;
1183 volatile uint32_t *irq_claim;
1184 mxge_cmd_t cmd;
1185 int slice, status;
1187 /* try to send a reset command to the card to see if it
1188 is alive */
1189 memset(&cmd, 0, sizeof (cmd));
1190 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1191 if (status != 0) {
1192 device_printf(sc->dev, "failed reset\n");
1193 return ENXIO;
1196 mxge_dummy_rdma(sc, 1);
1199 /* set the intrq size */
1200 cmd.data0 = sc->rx_ring_size;
1201 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1204 * Even though we already know how many slices are supported
1205 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1206 * has magic side effects, and must be called after a reset.
1207 * It must be called prior to calling any RSS related cmds,
1208 * including assigning an interrupt queue for anything but
1209 * slice 0. It must also be called *after*
1210 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1211 * the firmware to compute offsets.
1214 if (sc->num_slices > 1) {
1215 /* ask the maximum number of slices it supports */
1216 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1217 &cmd);
1218 if (status != 0) {
1219 device_printf(sc->dev,
1220 "failed to get number of slices\n");
1221 return status;
1224 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1225 * to setting up the interrupt queue DMA
1227 cmd.data0 = sc->num_slices;
1228 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1229 #ifdef IFNET_BUF_RING
1230 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1231 #endif
1232 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1233 &cmd);
1234 if (status != 0) {
1235 device_printf(sc->dev,
1236 "failed to set number of slices\n");
1237 return status;
1242 if (interrupts_setup) {
1243 /* Now exchange information about interrupts */
1244 for (slice = 0; slice < sc->num_slices; slice++) {
1245 rx_done = &sc->ss[slice].rx_done;
1246 memset(rx_done->entry, 0, sc->rx_ring_size);
1247 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1248 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1249 cmd.data2 = slice;
1250 status |= mxge_send_cmd(sc,
1251 MXGEFW_CMD_SET_INTRQ_DMA,
1252 &cmd);
1256 status |= mxge_send_cmd(sc,
1257 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1260 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1262 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1263 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1266 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1267 &cmd);
1268 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1269 if (status != 0) {
1270 device_printf(sc->dev, "failed set interrupt parameters\n");
1271 return status;
1275 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1278 /* run a DMA benchmark */
1279 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1281 for (slice = 0; slice < sc->num_slices; slice++) {
1282 ss = &sc->ss[slice];
1284 ss->irq_claim = irq_claim + (2 * slice);
1285 /* reset mcp/driver shared state back to 0 */
1286 ss->rx_done.idx = 0;
1287 ss->rx_done.cnt = 0;
1288 ss->tx.req = 0;
1289 ss->tx.done = 0;
1290 ss->tx.pkt_done = 0;
1291 ss->tx.queue_active = 0;
1292 ss->tx.activate = 0;
1293 ss->tx.deactivate = 0;
1294 ss->tx.wake = 0;
1295 ss->tx.defrag = 0;
1296 ss->tx.stall = 0;
1297 ss->rx_big.cnt = 0;
1298 ss->rx_small.cnt = 0;
1299 ss->lro_bad_csum = 0;
1300 ss->lro_queued = 0;
1301 ss->lro_flushed = 0;
1302 if (ss->fw_stats != NULL) {
1303 ss->fw_stats->valid = 0;
1304 ss->fw_stats->send_done_count = 0;
1307 sc->rdma_tags_available = 15;
1308 status = mxge_update_mac_address(sc);
1309 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1310 mxge_change_pause(sc, sc->pause);
1311 mxge_set_multicast_list(sc);
1312 return status;
1315 static int
1316 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1318 mxge_softc_t *sc;
1319 unsigned int intr_coal_delay;
1320 int err;
1322 sc = arg1;
1323 intr_coal_delay = sc->intr_coal_delay;
1324 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1325 if (err != 0) {
1326 return err;
1328 if (intr_coal_delay == sc->intr_coal_delay)
1329 return 0;
1331 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1332 return EINVAL;
1334 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
1335 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1336 sc->intr_coal_delay = intr_coal_delay;
1338 lockmgr(&sc->driver_lock, LK_RELEASE);
1339 return err;
1342 static int
1343 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1345 mxge_softc_t *sc;
1346 unsigned int enabled;
1347 int err;
1349 sc = arg1;
1350 enabled = sc->pause;
1351 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1352 if (err != 0) {
1353 return err;
1355 if (enabled == sc->pause)
1356 return 0;
1358 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
1359 err = mxge_change_pause(sc, enabled);
1360 lockmgr(&sc->driver_lock, LK_RELEASE);
1361 return err;
1364 static int
1365 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1367 struct ifnet *ifp;
1368 int err = 0;
1370 ifp = sc->ifp;
1371 if (lro_cnt == 0)
1372 ifp->if_capenable &= ~IFCAP_LRO;
1373 else
1374 ifp->if_capenable |= IFCAP_LRO;
1375 sc->lro_cnt = lro_cnt;
1376 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1377 mxge_close(sc);
1378 err = mxge_open(sc);
1380 return err;
1383 static int
1384 mxge_change_lro(SYSCTL_HANDLER_ARGS)
1386 mxge_softc_t *sc;
1387 unsigned int lro_cnt;
1388 int err;
1390 sc = arg1;
1391 lro_cnt = sc->lro_cnt;
1392 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1393 if (err != 0)
1394 return err;
1396 if (lro_cnt == sc->lro_cnt)
1397 return 0;
1399 if (lro_cnt > 128)
1400 return EINVAL;
1402 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
1403 err = mxge_change_lro_locked(sc, lro_cnt);
1404 lockmgr(&sc->driver_lock, LK_RELEASE);
1405 return err;
1408 static int
1409 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1411 int err;
1413 if (arg1 == NULL)
1414 return EFAULT;
1415 arg2 = be32toh(*(int *)arg1);
1416 arg1 = NULL;
1417 err = sysctl_handle_int(oidp, arg1, arg2, req);
1419 return err;
1422 static void
1423 mxge_rem_sysctls(mxge_softc_t *sc)
1425 struct mxge_slice_state *ss;
1426 int slice;
1428 if (sc->slice_sysctl_tree == NULL)
1429 return;
1431 for (slice = 0; slice < sc->num_slices; slice++) {
1432 ss = &sc->ss[slice];
1433 if (ss == NULL || ss->sysctl_tree == NULL)
1434 continue;
1435 sysctl_ctx_free(&ss->sysctl_ctx);
1436 ss->sysctl_tree = NULL;
1438 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1439 sc->slice_sysctl_tree = NULL;
1442 static void
1443 mxge_add_sysctls(mxge_softc_t *sc)
1445 struct sysctl_ctx_list *ctx;
1446 struct sysctl_oid_list *children;
1447 mcp_irq_data_t *fw;
1448 struct mxge_slice_state *ss;
1449 int slice;
1450 char slice_num[8];
1452 ctx = &sc->sysctl_ctx;
1453 sysctl_ctx_init(ctx);
1454 sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw),
1455 OID_AUTO,
1456 device_get_nameunit(sc->dev),
1457 CTLFLAG_RD, 0, "");
1458 if (sc->sysctl_tree == NULL) {
1459 device_printf(sc->dev, "can't add sysctl node\n");
1460 return;
1463 children = SYSCTL_CHILDREN(sc->sysctl_tree);
1464 fw = sc->ss[0].fw_stats;
1466 /* random information */
1467 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1468 "firmware_version",
1469 CTLFLAG_RD, &sc->fw_version,
1470 0, "firmware version");
1471 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1472 "serial_number",
1473 CTLFLAG_RD, &sc->serial_number_string,
1474 0, "serial number");
1475 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1476 "product_code",
1477 CTLFLAG_RD, &sc->product_code_string,
1478 0, "product_code");
1479 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1480 "pcie_link_width",
1481 CTLFLAG_RD, &sc->link_width,
1482 0, "tx_boundary");
1483 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1484 "tx_boundary",
1485 CTLFLAG_RD, &sc->tx_boundary,
1486 0, "tx_boundary");
1487 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1488 "write_combine",
1489 CTLFLAG_RD, &sc->wc,
1490 0, "write combining PIO?");
1491 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1492 "read_dma_MBs",
1493 CTLFLAG_RD, &sc->read_dma,
1494 0, "DMA Read speed in MB/s");
1495 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1496 "write_dma_MBs",
1497 CTLFLAG_RD, &sc->write_dma,
1498 0, "DMA Write speed in MB/s");
1499 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1500 "read_write_dma_MBs",
1501 CTLFLAG_RD, &sc->read_write_dma,
1502 0, "DMA concurrent Read/Write speed in MB/s");
1505 /* performance related tunables */
1506 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1507 "intr_coal_delay",
1508 CTLTYPE_INT|CTLFLAG_RW, sc,
1509 0, mxge_change_intr_coal,
1510 "I", "interrupt coalescing delay in usecs");
1512 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1513 "flow_control_enabled",
1514 CTLTYPE_INT|CTLFLAG_RW, sc,
1515 0, mxge_change_flow_control,
1516 "I", "interrupt coalescing delay in usecs");
1518 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1519 "deassert_wait",
1520 CTLFLAG_RW, &mxge_deassert_wait,
1521 0, "Wait for IRQ line to go low in ihandler");
1523 /* stats block from firmware is in network byte order.
1524 Need to swap it */
1525 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1526 "link_up",
1527 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1528 0, mxge_handle_be32,
1529 "I", "link up");
1530 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1531 "rdma_tags_available",
1532 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1533 0, mxge_handle_be32,
1534 "I", "rdma_tags_available");
1535 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1536 "dropped_bad_crc32",
1537 CTLTYPE_INT|CTLFLAG_RD,
1538 &fw->dropped_bad_crc32,
1539 0, mxge_handle_be32,
1540 "I", "dropped_bad_crc32");
1541 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1542 "dropped_bad_phy",
1543 CTLTYPE_INT|CTLFLAG_RD,
1544 &fw->dropped_bad_phy,
1545 0, mxge_handle_be32,
1546 "I", "dropped_bad_phy");
1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1548 "dropped_link_error_or_filtered",
1549 CTLTYPE_INT|CTLFLAG_RD,
1550 &fw->dropped_link_error_or_filtered,
1551 0, mxge_handle_be32,
1552 "I", "dropped_link_error_or_filtered");
1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1554 "dropped_link_overflow",
1555 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1556 0, mxge_handle_be32,
1557 "I", "dropped_link_overflow");
1558 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1559 "dropped_multicast_filtered",
1560 CTLTYPE_INT|CTLFLAG_RD,
1561 &fw->dropped_multicast_filtered,
1562 0, mxge_handle_be32,
1563 "I", "dropped_multicast_filtered");
1564 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1565 "dropped_no_big_buffer",
1566 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1567 0, mxge_handle_be32,
1568 "I", "dropped_no_big_buffer");
1569 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1570 "dropped_no_small_buffer",
1571 CTLTYPE_INT|CTLFLAG_RD,
1572 &fw->dropped_no_small_buffer,
1573 0, mxge_handle_be32,
1574 "I", "dropped_no_small_buffer");
1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1576 "dropped_overrun",
1577 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1578 0, mxge_handle_be32,
1579 "I", "dropped_overrun");
1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1581 "dropped_pause",
1582 CTLTYPE_INT|CTLFLAG_RD,
1583 &fw->dropped_pause,
1584 0, mxge_handle_be32,
1585 "I", "dropped_pause");
1586 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1587 "dropped_runt",
1588 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1589 0, mxge_handle_be32,
1590 "I", "dropped_runt");
1592 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1593 "dropped_unicast_filtered",
1594 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1595 0, mxge_handle_be32,
1596 "I", "dropped_unicast_filtered");
1598 /* verbose printing? */
1599 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1600 "verbose",
1601 CTLFLAG_RW, &mxge_verbose,
1602 0, "verbose printing");
1604 /* lro */
1605 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1606 "lro_cnt",
1607 CTLTYPE_INT|CTLFLAG_RW, sc,
1608 0, mxge_change_lro,
1609 "I", "number of lro merge queues");
1612 /* add counters exported for debugging from all slices */
1613 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1614 sc->slice_sysctl_tree =
1615 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1616 "slice", CTLFLAG_RD, 0, "");
1618 for (slice = 0; slice < sc->num_slices; slice++) {
1619 ss = &sc->ss[slice];
1620 sysctl_ctx_init(&ss->sysctl_ctx);
1621 ctx = &ss->sysctl_ctx;
1622 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1623 ksprintf(slice_num, "%d", slice);
1624 ss->sysctl_tree =
1625 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1626 CTLFLAG_RD, 0, "");
1627 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1628 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1629 "rx_small_cnt",
1630 CTLFLAG_RD, &ss->rx_small.cnt,
1631 0, "rx_small_cnt");
1632 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1633 "rx_big_cnt",
1634 CTLFLAG_RD, &ss->rx_big.cnt,
1635 0, "rx_small_cnt");
1636 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1637 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1638 0, "number of lro merge queues flushed");
1640 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1641 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1642 0, "number of frames appended to lro merge"
1643 "queues");
1645 #ifndef IFNET_BUF_RING
1646 /* only transmit from slice 0 for now */
1647 if (slice > 0)
1648 continue;
1649 #endif
1650 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1651 "tx_req",
1652 CTLFLAG_RD, &ss->tx.req,
1653 0, "tx_req");
1655 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1656 "tx_done",
1657 CTLFLAG_RD, &ss->tx.done,
1658 0, "tx_done");
1659 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1660 "tx_pkt_done",
1661 CTLFLAG_RD, &ss->tx.pkt_done,
1662 0, "tx_done");
1663 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1664 "tx_stall",
1665 CTLFLAG_RD, &ss->tx.stall,
1666 0, "tx_stall");
1667 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1668 "tx_wake",
1669 CTLFLAG_RD, &ss->tx.wake,
1670 0, "tx_wake");
1671 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1672 "tx_defrag",
1673 CTLFLAG_RD, &ss->tx.defrag,
1674 0, "tx_defrag");
1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1676 "tx_queue_active",
1677 CTLFLAG_RD, &ss->tx.queue_active,
1678 0, "tx_queue_active");
1679 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1680 "tx_activate",
1681 CTLFLAG_RD, &ss->tx.activate,
1682 0, "tx_activate");
1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1684 "tx_deactivate",
1685 CTLFLAG_RD, &ss->tx.deactivate,
1686 0, "tx_deactivate");
1690 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1691 backwards one at a time and handle ring wraps */
1693 static inline void
1694 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1695 mcp_kreq_ether_send_t *src, int cnt)
1697 int idx, starting_slot;
1698 starting_slot = tx->req;
1699 while (cnt > 1) {
1700 cnt--;
1701 idx = (starting_slot + cnt) & tx->mask;
1702 mxge_pio_copy(&tx->lanai[idx],
1703 &src[cnt], sizeof(*src));
1704 wmb();
1709 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1710 * at most 32 bytes at a time, so as to avoid involving the software
1711 * pio handler in the nic. We re-write the first segment's flags
1712 * to mark them valid only after writing the entire chain
1715 static inline void
1716 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1717 int cnt)
1719 int idx, i;
1720 uint32_t *src_ints;
1721 volatile uint32_t *dst_ints;
1722 mcp_kreq_ether_send_t *srcp;
1723 volatile mcp_kreq_ether_send_t *dstp, *dst;
1724 uint8_t last_flags;
1726 idx = tx->req & tx->mask;
1728 last_flags = src->flags;
1729 src->flags = 0;
1730 wmb();
1731 dst = dstp = &tx->lanai[idx];
1732 srcp = src;
1734 if ((idx + cnt) < tx->mask) {
1735 for (i = 0; i < (cnt - 1); i += 2) {
1736 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1737 wmb(); /* force write every 32 bytes */
1738 srcp += 2;
1739 dstp += 2;
1741 } else {
1742 /* submit all but the first request, and ensure
1743 that it is submitted below */
1744 mxge_submit_req_backwards(tx, src, cnt);
1745 i = 0;
1747 if (i < cnt) {
1748 /* submit the first request */
1749 mxge_pio_copy(dstp, srcp, sizeof(*src));
1750 wmb(); /* barrier before setting valid flag */
1753 /* re-write the last 32-bits with the valid flags */
1754 src->flags = last_flags;
1755 src_ints = (uint32_t *)src;
1756 src_ints+=3;
1757 dst_ints = (volatile uint32_t *)dst;
1758 dst_ints+=3;
1759 *dst_ints = *src_ints;
1760 tx->req += cnt;
1761 wmb();
1764 #if IFCAP_TSO4
1766 static void
1767 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1768 int busdma_seg_cnt, int ip_off)
1770 mxge_tx_ring_t *tx;
1771 mcp_kreq_ether_send_t *req;
1772 bus_dma_segment_t *seg;
1773 struct ip *ip;
1774 struct tcphdr *tcp;
1775 uint32_t low, high_swapped;
1776 int len, seglen, cum_len, cum_len_next;
1777 int next_is_first, chop, cnt, rdma_count, small;
1778 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1779 uint8_t flags, flags_next;
1780 static int once;
1782 mss = m->m_pkthdr.tso_segsz;
1784 /* negative cum_len signifies to the
1785 * send loop that we are still in the
1786 * header portion of the TSO packet.
1789 /* ensure we have the ethernet, IP and TCP
1790 header together in the first mbuf, copy
1791 it to a scratch buffer if not */
1792 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1793 m_copydata(m, 0, ip_off + sizeof (*ip),
1794 ss->scratch);
1795 ip = (struct ip *)(ss->scratch + ip_off);
1796 } else {
1797 ip = (struct ip *)(mtod(m, char *) + ip_off);
1799 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1800 + sizeof (*tcp))) {
1801 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1802 + sizeof (*tcp), ss->scratch);
1803 ip = (struct ip *)(mtod(m, char *) + ip_off);
1806 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1807 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1809 /* TSO implies checksum offload on this hardware */
1810 cksum_offset = ip_off + (ip->ip_hl << 2);
1811 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1814 /* for TSO, pseudo_hdr_offset holds mss.
1815 * The firmware figures out where to put
1816 * the checksum by parsing the header. */
1817 pseudo_hdr_offset = htobe16(mss);
1819 tx = &ss->tx;
1820 req = tx->req_list;
1821 seg = tx->seg_list;
1822 cnt = 0;
1823 rdma_count = 0;
1824 /* "rdma_count" is the number of RDMAs belonging to the
1825 * current packet BEFORE the current send request. For
1826 * non-TSO packets, this is equal to "count".
1827 * For TSO packets, rdma_count needs to be reset
1828 * to 0 after a segment cut.
1830 * The rdma_count field of the send request is
1831 * the number of RDMAs of the packet starting at
1832 * that request. For TSO send requests with one ore more cuts
1833 * in the middle, this is the number of RDMAs starting
1834 * after the last cut in the request. All previous
1835 * segments before the last cut implicitly have 1 RDMA.
1837 * Since the number of RDMAs is not known beforehand,
1838 * it must be filled-in retroactively - after each
1839 * segmentation cut or at the end of the entire packet.
1842 while (busdma_seg_cnt) {
1843 /* Break the busdma segment up into pieces*/
1844 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1845 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1846 len = seg->ds_len;
1848 while (len) {
1849 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1850 seglen = len;
1851 cum_len_next = cum_len + seglen;
1852 (req-rdma_count)->rdma_count = rdma_count + 1;
1853 if (__predict_true(cum_len >= 0)) {
1854 /* payload */
1855 chop = (cum_len_next > mss);
1856 cum_len_next = cum_len_next % mss;
1857 next_is_first = (cum_len_next == 0);
1858 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1859 flags_next |= next_is_first *
1860 MXGEFW_FLAGS_FIRST;
1861 rdma_count |= -(chop | next_is_first);
1862 rdma_count += chop & !next_is_first;
1863 } else if (cum_len_next >= 0) {
1864 /* header ends */
1865 rdma_count = -1;
1866 cum_len_next = 0;
1867 seglen = -cum_len;
1868 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1869 flags_next = MXGEFW_FLAGS_TSO_PLD |
1870 MXGEFW_FLAGS_FIRST |
1871 (small * MXGEFW_FLAGS_SMALL);
1874 req->addr_high = high_swapped;
1875 req->addr_low = htobe32(low);
1876 req->pseudo_hdr_offset = pseudo_hdr_offset;
1877 req->pad = 0;
1878 req->rdma_count = 1;
1879 req->length = htobe16(seglen);
1880 req->cksum_offset = cksum_offset;
1881 req->flags = flags | ((cum_len & 1) *
1882 MXGEFW_FLAGS_ALIGN_ODD);
1883 low += seglen;
1884 len -= seglen;
1885 cum_len = cum_len_next;
1886 flags = flags_next;
1887 req++;
1888 cnt++;
1889 rdma_count++;
1890 if (__predict_false(cksum_offset > seglen))
1891 cksum_offset -= seglen;
1892 else
1893 cksum_offset = 0;
1894 if (__predict_false(cnt > tx->max_desc))
1895 goto drop;
1897 busdma_seg_cnt--;
1898 seg++;
1900 (req-rdma_count)->rdma_count = rdma_count;
1902 do {
1903 req--;
1904 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1905 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1907 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1908 mxge_submit_req(tx, tx->req_list, cnt);
1909 #ifdef IFNET_BUF_RING
1910 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1911 /* tell the NIC to start polling this slice */
1912 *tx->send_go = 1;
1913 tx->queue_active = 1;
1914 tx->activate++;
1915 wmb();
1917 #endif
1918 return;
1920 drop:
1921 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1922 m_freem(m);
1923 ss->oerrors++;
1924 if (!once) {
1925 kprintf("tx->max_desc exceeded via TSO!\n");
1926 kprintf("mss = %d, %ld, %d!\n", mss,
1927 (long)seg - (long)tx->seg_list, tx->max_desc);
1928 once = 1;
1930 return;
1934 #endif /* IFCAP_TSO4 */
1936 #ifdef MXGE_NEW_VLAN_API
1938 * We reproduce the software vlan tag insertion from
1939 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1940 * vlan tag insertion. We need to advertise this in order to have the
1941 * vlan interface respect our csum offload flags.
1943 static struct mbuf *
1944 mxge_vlan_tag_insert(struct mbuf *m)
1946 struct ether_vlan_header *evl;
1948 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, MB_DONTWAIT);
1949 if (__predict_false(m == NULL))
1950 return NULL;
1951 if (m->m_len < sizeof(*evl)) {
1952 m = m_pullup(m, sizeof(*evl));
1953 if (__predict_false(m == NULL))
1954 return NULL;
1957 * Transform the Ethernet header into an Ethernet header
1958 * with 802.1Q encapsulation.
1960 evl = mtod(m, struct ether_vlan_header *);
1961 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
1962 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1963 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1964 evl->evl_tag = htons(m->m_pkthdr.ether_vtag);
1965 m->m_flags &= ~M_VLANTAG;
1966 return m;
1968 #endif /* MXGE_NEW_VLAN_API */
1970 static void
1971 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
1973 mxge_softc_t *sc;
1974 mcp_kreq_ether_send_t *req;
1975 bus_dma_segment_t *seg;
1976 struct mbuf *m_tmp;
1977 struct ifnet *ifp;
1978 mxge_tx_ring_t *tx;
1979 struct ip *ip;
1980 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
1981 uint16_t pseudo_hdr_offset;
1982 uint8_t flags, cksum_offset;
1985 sc = ss->sc;
1986 ifp = sc->ifp;
1987 tx = &ss->tx;
1989 ip_off = sizeof (struct ether_header);
1990 #ifdef MXGE_NEW_VLAN_API
1991 if (m->m_flags & M_VLANTAG) {
1992 m = mxge_vlan_tag_insert(m);
1993 if (__predict_false(m == NULL))
1994 goto drop;
1995 ip_off += ETHER_VLAN_ENCAP_LEN;
1997 #endif
1998 /* (try to) map the frame for DMA */
1999 idx = tx->req & tx->mask;
2000 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map,
2001 m, tx->seg_list, &cnt,
2002 BUS_DMA_NOWAIT);
2003 if (__predict_false(err == EFBIG)) {
2004 /* Too many segments in the chain. Try
2005 to defrag */
2006 m_tmp = m_defrag(m, M_NOWAIT);
2007 if (m_tmp == NULL) {
2008 goto drop;
2010 ss->tx.defrag++;
2011 m = m_tmp;
2012 err = bus_dmamap_load_mbuf_sg(tx->dmat,
2013 tx->info[idx].map,
2014 m, tx->seg_list, &cnt,
2015 BUS_DMA_NOWAIT);
2017 if (__predict_false(err != 0)) {
2018 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d"
2019 " packet len = %d\n", err, m->m_pkthdr.len);
2020 goto drop;
2022 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2023 BUS_DMASYNC_PREWRITE);
2024 tx->info[idx].m = m;
2026 #if IFCAP_TSO4
2027 /* TSO is different enough, we handle it in another routine */
2028 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2029 mxge_encap_tso(ss, m, cnt, ip_off);
2030 return;
2032 #endif
2034 req = tx->req_list;
2035 cksum_offset = 0;
2036 pseudo_hdr_offset = 0;
2037 flags = MXGEFW_FLAGS_NO_TSO;
2039 /* checksum offloading? */
2040 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
2041 /* ensure ip header is in first mbuf, copy
2042 it to a scratch buffer if not */
2043 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2044 m_copydata(m, 0, ip_off + sizeof (*ip),
2045 ss->scratch);
2046 ip = (struct ip *)(ss->scratch + ip_off);
2047 } else {
2048 ip = (struct ip *)(mtod(m, char *) + ip_off);
2050 cksum_offset = ip_off + (ip->ip_hl << 2);
2051 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2052 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2053 req->cksum_offset = cksum_offset;
2054 flags |= MXGEFW_FLAGS_CKSUM;
2055 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2056 } else {
2057 odd_flag = 0;
2059 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2060 flags |= MXGEFW_FLAGS_SMALL;
2062 /* convert segments into a request list */
2063 cum_len = 0;
2064 seg = tx->seg_list;
2065 req->flags = MXGEFW_FLAGS_FIRST;
2066 for (i = 0; i < cnt; i++) {
2067 req->addr_low =
2068 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2069 req->addr_high =
2070 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2071 req->length = htobe16(seg->ds_len);
2072 req->cksum_offset = cksum_offset;
2073 if (cksum_offset > seg->ds_len)
2074 cksum_offset -= seg->ds_len;
2075 else
2076 cksum_offset = 0;
2077 req->pseudo_hdr_offset = pseudo_hdr_offset;
2078 req->pad = 0; /* complete solid 16-byte block */
2079 req->rdma_count = 1;
2080 req->flags |= flags | ((cum_len & 1) * odd_flag);
2081 cum_len += seg->ds_len;
2082 seg++;
2083 req++;
2084 req->flags = 0;
2086 req--;
2087 /* pad runts to 60 bytes */
2088 if (cum_len < 60) {
2089 req++;
2090 req->addr_low =
2091 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2092 req->addr_high =
2093 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2094 req->length = htobe16(60 - cum_len);
2095 req->cksum_offset = 0;
2096 req->pseudo_hdr_offset = pseudo_hdr_offset;
2097 req->pad = 0; /* complete solid 16-byte block */
2098 req->rdma_count = 1;
2099 req->flags |= flags | ((cum_len & 1) * odd_flag);
2100 cnt++;
2103 tx->req_list[0].rdma_count = cnt;
2104 #if 0
2105 /* print what the firmware will see */
2106 for (i = 0; i < cnt; i++) {
2107 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2108 "cso:%d, flags:0x%x, rdma:%d\n",
2109 i, (int)ntohl(tx->req_list[i].addr_high),
2110 (int)ntohl(tx->req_list[i].addr_low),
2111 (int)ntohs(tx->req_list[i].length),
2112 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2113 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2114 tx->req_list[i].rdma_count);
2116 kprintf("--------------\n");
2117 #endif
2118 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2119 mxge_submit_req(tx, tx->req_list, cnt);
2120 #ifdef IFNET_BUF_RING
2121 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2122 /* tell the NIC to start polling this slice */
2123 *tx->send_go = 1;
2124 tx->queue_active = 1;
2125 tx->activate++;
2126 wmb();
2128 #endif
2129 return;
2131 drop:
2132 m_freem(m);
2133 ss->oerrors++;
2134 return;
2137 #ifdef IFNET_BUF_RING
2138 static void
2139 mxge_qflush(struct ifnet *ifp)
2141 mxge_softc_t *sc = ifp->if_softc;
2142 mxge_tx_ring_t *tx;
2143 struct mbuf *m;
2144 int slice;
2146 for (slice = 0; slice < sc->num_slices; slice++) {
2147 tx = &sc->ss[slice].tx;
2148 lockmgr(&tx->lock, LK_EXCLUSIVE);
2149 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2150 m_freem(m);
2151 lockmgr(&tx->lock, LK_RELEASE);
2153 if_qflush(ifp);
2156 static inline void
2157 mxge_start_locked(struct mxge_slice_state *ss)
2159 mxge_softc_t *sc;
2160 struct mbuf *m;
2161 struct ifnet *ifp;
2162 mxge_tx_ring_t *tx;
2164 sc = ss->sc;
2165 ifp = sc->ifp;
2166 tx = &ss->tx;
2168 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2169 m = drbr_dequeue(ifp, tx->br);
2170 if (m == NULL) {
2171 return;
2173 /* let BPF see it */
2174 BPF_MTAP(ifp, m);
2176 /* give it to the nic */
2177 mxge_encap(ss, m);
2179 /* ran out of transmit slots */
2180 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0)
2181 && (!drbr_empty(ifp, tx->br))) {
2182 ss->if_drv_flags |= IFF_DRV_OACTIVE;
2183 tx->stall++;
2187 static int
2188 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2190 mxge_softc_t *sc;
2191 struct ifnet *ifp;
2192 mxge_tx_ring_t *tx;
2193 int err;
2195 sc = ss->sc;
2196 ifp = sc->ifp;
2197 tx = &ss->tx;
2199 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
2200 IFF_DRV_RUNNING) {
2201 err = drbr_enqueue(ifp, tx->br, m);
2202 return (err);
2205 if (drbr_empty(ifp, tx->br) &&
2206 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2207 /* let BPF see it */
2208 BPF_MTAP(ifp, m);
2209 /* give it to the nic */
2210 mxge_encap(ss, m);
2211 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2212 return (err);
2214 if (!drbr_empty(ifp, tx->br))
2215 mxge_start_locked(ss);
2216 return (0);
2219 static int
2220 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2222 mxge_softc_t *sc = ifp->if_softc;
2223 struct mxge_slice_state *ss;
2224 mxge_tx_ring_t *tx;
2225 int err = 0;
2226 int slice;
2228 slice = m->m_pkthdr.flowid;
2229 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2231 ss = &sc->ss[slice];
2232 tx = &ss->tx;
2234 if (lockmgr(&tx->lock, LK_EXCLUSIVE|LK_NOWAIT)) {
2235 err = mxge_transmit_locked(ss, m);
2236 lockmgr(&tx->lock, LK_RELEASE);
2237 } else {
2238 err = drbr_enqueue(ifp, tx->br, m);
2241 return (err);
2244 #else
2246 static inline void
2247 mxge_start_locked(struct mxge_slice_state *ss)
2249 mxge_softc_t *sc;
2250 struct mbuf *m;
2251 struct ifnet *ifp;
2252 mxge_tx_ring_t *tx;
2254 sc = ss->sc;
2255 ifp = sc->ifp;
2256 tx = &ss->tx;
2257 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2258 IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
2259 if (m == NULL) {
2260 return;
2262 /* let BPF see it */
2263 BPF_MTAP(ifp, m);
2265 /* give it to the nic */
2266 mxge_encap(ss, m);
2268 /* ran out of transmit slots */
2269 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
2270 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2271 tx->stall++;
2274 #endif
2275 static void
2276 mxge_start(struct ifnet *ifp)
2278 mxge_softc_t *sc = ifp->if_softc;
2279 struct mxge_slice_state *ss;
2281 /* only use the first slice for now */
2282 ss = &sc->ss[0];
2283 lockmgr(&ss->tx.lock, LK_EXCLUSIVE);
2284 mxge_start_locked(ss);
2285 lockmgr(&ss->tx.lock, LK_RELEASE);
2289 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2290 * at most 32 bytes at a time, so as to avoid involving the software
2291 * pio handler in the nic. We re-write the first segment's low
2292 * DMA address to mark it valid only after we write the entire chunk
2293 * in a burst
2295 static inline void
2296 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2297 mcp_kreq_ether_recv_t *src)
2299 uint32_t low;
2301 low = src->addr_low;
2302 src->addr_low = 0xffffffff;
2303 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2304 wmb();
2305 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2306 wmb();
2307 src->addr_low = low;
2308 dst->addr_low = low;
2309 wmb();
2312 static int
2313 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2315 bus_dma_segment_t seg;
2316 struct mbuf *m;
2317 mxge_rx_ring_t *rx = &ss->rx_small;
2318 int cnt, err;
2320 m = m_gethdr(MB_DONTWAIT, MT_DATA);
2321 if (m == NULL) {
2322 rx->alloc_fail++;
2323 err = ENOBUFS;
2324 goto done;
2326 m->m_len = MHLEN;
2327 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2328 &seg, &cnt, BUS_DMA_NOWAIT);
2329 if (err != 0) {
2330 m_free(m);
2331 goto done;
2333 rx->info[idx].m = m;
2334 rx->shadow[idx].addr_low =
2335 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2336 rx->shadow[idx].addr_high =
2337 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2339 done:
2340 if ((idx & 7) == 7)
2341 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2342 return err;
2345 static int
2346 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2348 bus_dma_segment_t seg[3];
2349 struct mbuf *m;
2350 mxge_rx_ring_t *rx = &ss->rx_big;
2351 int cnt, err, i;
2353 if (rx->cl_size == MCLBYTES)
2354 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
2355 else
2356 m = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2357 if (m == NULL) {
2358 rx->alloc_fail++;
2359 err = ENOBUFS;
2360 goto done;
2362 m->m_len = rx->mlen;
2363 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m,
2364 seg, &cnt, BUS_DMA_NOWAIT);
2365 if (err != 0) {
2366 m_free(m);
2367 goto done;
2369 rx->info[idx].m = m;
2370 rx->shadow[idx].addr_low =
2371 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2372 rx->shadow[idx].addr_high =
2373 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2375 #if MXGE_VIRT_JUMBOS
2376 for (i = 1; i < cnt; i++) {
2377 rx->shadow[idx + i].addr_low =
2378 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2379 rx->shadow[idx + i].addr_high =
2380 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2382 #endif
2384 done:
2385 for (i = 0; i < rx->nbufs; i++) {
2386 if ((idx & 7) == 7) {
2387 mxge_submit_8rx(&rx->lanai[idx - 7],
2388 &rx->shadow[idx - 7]);
2390 idx++;
2392 return err;
2396 * Myri10GE hardware checksums are not valid if the sender
2397 * padded the frame with non-zero padding. This is because
2398 * the firmware just does a simple 16-bit 1s complement
2399 * checksum across the entire frame, excluding the first 14
2400 * bytes. It is best to simply to check the checksum and
2401 * tell the stack about it only if the checksum is good
2404 static inline uint16_t
2405 mxge_rx_csum(struct mbuf *m, int csum)
2407 struct ether_header *eh;
2408 struct ip *ip;
2409 uint16_t c;
2411 eh = mtod(m, struct ether_header *);
2413 /* only deal with IPv4 TCP & UDP for now */
2414 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2415 return 1;
2416 ip = (struct ip *)(eh + 1);
2417 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2418 ip->ip_p != IPPROTO_UDP))
2419 return 1;
2420 #ifdef INET
2421 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2422 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2423 - (ip->ip_hl << 2) + ip->ip_p));
2424 #else
2425 c = 1;
2426 #endif
2427 c ^= 0xffff;
2428 return (c);
2431 static void
2432 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2434 struct ether_vlan_header *evl;
2435 struct ether_header *eh;
2436 uint32_t partial;
2438 evl = mtod(m, struct ether_vlan_header *);
2439 eh = mtod(m, struct ether_header *);
2442 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2443 * after what the firmware thought was the end of the ethernet
2444 * header.
2447 /* put checksum into host byte order */
2448 *csum = ntohs(*csum);
2449 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2450 (*csum) += ~partial;
2451 (*csum) += ((*csum) < ~partial);
2452 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2453 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2455 /* restore checksum to network byte order;
2456 later consumers expect this */
2457 *csum = htons(*csum);
2459 /* save the tag */
2460 #ifdef MXGE_NEW_VLAN_API
2461 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
2462 #else
2464 struct m_tag *mtag;
2465 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2466 M_NOWAIT);
2467 if (mtag == NULL)
2468 return;
2469 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2470 m_tag_prepend(m, mtag);
2473 #endif
2474 m->m_flags |= M_VLANTAG;
2477 * Remove the 802.1q header by copying the Ethernet
2478 * addresses over it and adjusting the beginning of
2479 * the data in the mbuf. The encapsulated Ethernet
2480 * type field is already in place.
2482 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
2483 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2484 m_adj(m, ETHER_VLAN_ENCAP_LEN);
2488 static inline void
2489 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2491 mxge_softc_t *sc;
2492 struct ifnet *ifp;
2493 struct mbuf *m;
2494 struct ether_header *eh;
2495 mxge_rx_ring_t *rx;
2496 bus_dmamap_t old_map;
2497 int idx;
2498 uint16_t tcpudp_csum;
2500 sc = ss->sc;
2501 ifp = sc->ifp;
2502 rx = &ss->rx_big;
2503 idx = rx->cnt & rx->mask;
2504 rx->cnt += rx->nbufs;
2505 /* save a pointer to the received mbuf */
2506 m = rx->info[idx].m;
2507 /* try to replace the received mbuf */
2508 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2509 /* drop the frame -- the old mbuf is re-cycled */
2510 ifp->if_ierrors++;
2511 return;
2514 /* unmap the received buffer */
2515 old_map = rx->info[idx].map;
2516 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2517 bus_dmamap_unload(rx->dmat, old_map);
2519 /* swap the bus_dmamap_t's */
2520 rx->info[idx].map = rx->extra_map;
2521 rx->extra_map = old_map;
2523 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2524 * aligned */
2525 m->m_data += MXGEFW_PAD;
2527 m->m_pkthdr.rcvif = ifp;
2528 m->m_len = m->m_pkthdr.len = len;
2529 ss->ipackets++;
2530 eh = mtod(m, struct ether_header *);
2531 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2532 mxge_vlan_tag_remove(m, &csum);
2534 /* if the checksum is valid, mark it in the mbuf header */
2535 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2536 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2537 return;
2538 /* otherwise, it was a UDP frame, or a TCP frame which
2539 we could not do LRO on. Tell the stack that the
2540 checksum is good */
2541 m->m_pkthdr.csum_data = 0xffff;
2542 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2544 /* flowid only valid if RSS hashing is enabled */
2545 if (sc->num_slices > 1) {
2546 m->m_pkthdr.flowid = (ss - sc->ss);
2547 m->m_flags |= M_FLOWID;
2549 /* pass the frame up the stack */
2550 (*ifp->if_input)(ifp, m);
2553 static inline void
2554 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2556 mxge_softc_t *sc;
2557 struct ifnet *ifp;
2558 struct ether_header *eh;
2559 struct mbuf *m;
2560 mxge_rx_ring_t *rx;
2561 bus_dmamap_t old_map;
2562 int idx;
2563 uint16_t tcpudp_csum;
2565 sc = ss->sc;
2566 ifp = sc->ifp;
2567 rx = &ss->rx_small;
2568 idx = rx->cnt & rx->mask;
2569 rx->cnt++;
2570 /* save a pointer to the received mbuf */
2571 m = rx->info[idx].m;
2572 /* try to replace the received mbuf */
2573 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2574 /* drop the frame -- the old mbuf is re-cycled */
2575 ifp->if_ierrors++;
2576 return;
2579 /* unmap the received buffer */
2580 old_map = rx->info[idx].map;
2581 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2582 bus_dmamap_unload(rx->dmat, old_map);
2584 /* swap the bus_dmamap_t's */
2585 rx->info[idx].map = rx->extra_map;
2586 rx->extra_map = old_map;
2588 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2589 * aligned */
2590 m->m_data += MXGEFW_PAD;
2592 m->m_pkthdr.rcvif = ifp;
2593 m->m_len = m->m_pkthdr.len = len;
2594 ss->ipackets++;
2595 eh = mtod(m, struct ether_header *);
2596 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2597 mxge_vlan_tag_remove(m, &csum);
2599 /* if the checksum is valid, mark it in the mbuf header */
2600 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2601 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2602 return;
2603 /* otherwise, it was a UDP frame, or a TCP frame which
2604 we could not do LRO on. Tell the stack that the
2605 checksum is good */
2606 m->m_pkthdr.csum_data = 0xffff;
2607 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2609 /* flowid only valid if RSS hashing is enabled */
2610 if (sc->num_slices > 1) {
2611 m->m_pkthdr.flowid = (ss - sc->ss);
2612 m->m_flags |= M_FLOWID;
2614 /* pass the frame up the stack */
2615 (*ifp->if_input)(ifp, m);
2618 static inline void
2619 mxge_clean_rx_done(struct mxge_slice_state *ss)
2621 mxge_rx_done_t *rx_done = &ss->rx_done;
2622 int limit = 0;
2623 uint16_t length;
2624 uint16_t checksum;
2627 while (rx_done->entry[rx_done->idx].length != 0) {
2628 length = ntohs(rx_done->entry[rx_done->idx].length);
2629 rx_done->entry[rx_done->idx].length = 0;
2630 checksum = rx_done->entry[rx_done->idx].checksum;
2631 if (length <= (MHLEN - MXGEFW_PAD))
2632 mxge_rx_done_small(ss, length, checksum);
2633 else
2634 mxge_rx_done_big(ss, length, checksum);
2635 rx_done->cnt++;
2636 rx_done->idx = rx_done->cnt & rx_done->mask;
2638 /* limit potential for livelock */
2639 if (__predict_false(++limit > rx_done->mask / 2))
2640 break;
2642 #ifdef INET
2643 while (!SLIST_EMPTY(&ss->lro_active)) {
2644 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
2645 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2646 mxge_lro_flush(ss, lro);
2648 #endif
2652 static inline void
2653 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2655 struct ifnet *ifp;
2656 mxge_tx_ring_t *tx;
2657 struct mbuf *m;
2658 bus_dmamap_t map;
2659 int idx;
2660 int *flags;
2662 tx = &ss->tx;
2663 ifp = ss->sc->ifp;
2664 while (tx->pkt_done != mcp_idx) {
2665 idx = tx->done & tx->mask;
2666 tx->done++;
2667 m = tx->info[idx].m;
2668 /* mbuf and DMA map only attached to the first
2669 segment per-mbuf */
2670 if (m != NULL) {
2671 ss->obytes += m->m_pkthdr.len;
2672 if (m->m_flags & M_MCAST)
2673 ss->omcasts++;
2674 ss->opackets++;
2675 tx->info[idx].m = NULL;
2676 map = tx->info[idx].map;
2677 bus_dmamap_unload(tx->dmat, map);
2678 m_freem(m);
2680 if (tx->info[idx].flag) {
2681 tx->info[idx].flag = 0;
2682 tx->pkt_done++;
2686 /* If we have space, clear IFF_OACTIVE to tell the stack that
2687 its OK to send packets */
2688 #ifdef IFNET_BUF_RING
2689 flags = &ss->if_drv_flags;
2690 #else
2691 flags = &ifp->if_drv_flags;
2692 #endif
2693 lockmgr(&ss->tx.lock, LK_EXCLUSIVE);
2694 if ((*flags) & IFF_DRV_OACTIVE &&
2695 tx->req - tx->done < (tx->mask + 1)/4) {
2696 *(flags) &= ~IFF_DRV_OACTIVE;
2697 ss->tx.wake++;
2698 mxge_start_locked(ss);
2700 #ifdef IFNET_BUF_RING
2701 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2702 /* let the NIC stop polling this queue, since there
2703 * are no more transmits pending */
2704 if (tx->req == tx->done) {
2705 *tx->send_stop = 1;
2706 tx->queue_active = 0;
2707 tx->deactivate++;
2708 wmb();
2711 #endif
2712 lockmgr(&ss->tx.lock, LK_RELEASE);
2716 static struct mxge_media_type mxge_xfp_media_types[] =
2718 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2719 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2720 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2721 {0, (1 << 5), "10GBASE-ER"},
2722 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2723 {0, (1 << 3), "10GBASE-SW"},
2724 {0, (1 << 2), "10GBASE-LW"},
2725 {0, (1 << 1), "10GBASE-EW"},
2726 {0, (1 << 0), "Reserved"}
2728 static struct mxge_media_type mxge_sfp_media_types[] =
2730 {0, (1 << 7), "Reserved"},
2731 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2732 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2733 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}
2736 static void
2737 mxge_set_media(mxge_softc_t *sc, int type)
2739 sc->media_flags |= type;
2740 ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
2741 ifmedia_set(&sc->media, sc->media_flags);
2746 * Determine the media type for a NIC. Some XFPs will identify
2747 * themselves only when their link is up, so this is initiated via a
2748 * link up interrupt. However, this can potentially take up to
2749 * several milliseconds, so it is run via the watchdog routine, rather
2750 * than in the interrupt handler itself. This need only be done
2751 * once, not each time the link is up.
2753 static void
2754 mxge_media_probe(mxge_softc_t *sc)
2756 mxge_cmd_t cmd;
2757 char *cage_type;
2758 char *ptr;
2759 struct mxge_media_type *mxge_media_types = NULL;
2760 int i, err, ms, mxge_media_type_entries;
2761 uint32_t byte;
2763 sc->need_media_probe = 0;
2765 /* if we've already set a media type, we're done */
2766 if (sc->media_flags != (IFM_ETHER | IFM_AUTO))
2767 return;
2770 * parse the product code to deterimine the interface type
2771 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2772 * after the 3rd dash in the driver's cached copy of the
2773 * EEPROM's product code string.
2775 ptr = sc->product_code_string;
2776 if (ptr == NULL) {
2777 device_printf(sc->dev, "Missing product code\n");
2780 for (i = 0; i < 3; i++, ptr++) {
2781 ptr = index(ptr, '-');
2782 if (ptr == NULL) {
2783 device_printf(sc->dev,
2784 "only %d dashes in PC?!?\n", i);
2785 return;
2788 if (*ptr == 'C') {
2789 /* -C is CX4 */
2790 mxge_set_media(sc, IFM_10G_CX4);
2791 return;
2793 else if (*ptr == 'Q') {
2794 /* -Q is Quad Ribbon Fiber */
2795 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2796 /* FreeBSD has no media type for Quad ribbon fiber */
2797 return;
2800 if (*ptr == 'R') {
2801 /* -R is XFP */
2802 mxge_media_types = mxge_xfp_media_types;
2803 mxge_media_type_entries =
2804 sizeof (mxge_xfp_media_types) /
2805 sizeof (mxge_xfp_media_types[0]);
2806 byte = MXGE_XFP_COMPLIANCE_BYTE;
2807 cage_type = "XFP";
2810 if (*ptr == 'S' || *(ptr +1) == 'S') {
2811 /* -S or -2S is SFP+ */
2812 mxge_media_types = mxge_sfp_media_types;
2813 mxge_media_type_entries =
2814 sizeof (mxge_sfp_media_types) /
2815 sizeof (mxge_sfp_media_types[0]);
2816 cage_type = "SFP+";
2817 byte = 3;
2820 if (mxge_media_types == NULL) {
2821 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2822 return;
2826 * At this point we know the NIC has an XFP cage, so now we
2827 * try to determine what is in the cage by using the
2828 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2829 * register. We read just one byte, which may take over
2830 * a millisecond
2833 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2834 cmd.data1 = byte;
2835 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2836 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2837 device_printf(sc->dev, "failed to read XFP\n");
2839 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2840 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2842 if (err != MXGEFW_CMD_OK) {
2843 return;
2846 /* now we wait for the data to be cached */
2847 cmd.data0 = byte;
2848 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2849 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2850 DELAY(1000);
2851 cmd.data0 = byte;
2852 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2854 if (err != MXGEFW_CMD_OK) {
2855 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2856 cage_type, err, ms);
2857 return;
2860 if (cmd.data0 == mxge_media_types[0].bitmask) {
2861 if (mxge_verbose)
2862 device_printf(sc->dev, "%s:%s\n", cage_type,
2863 mxge_media_types[0].name);
2864 mxge_set_media(sc, IFM_10G_CX4);
2865 return;
2867 for (i = 1; i < mxge_media_type_entries; i++) {
2868 if (cmd.data0 & mxge_media_types[i].bitmask) {
2869 if (mxge_verbose)
2870 device_printf(sc->dev, "%s:%s\n",
2871 cage_type,
2872 mxge_media_types[i].name);
2874 mxge_set_media(sc, mxge_media_types[i].flag);
2875 return;
2878 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type,
2879 cmd.data0);
2881 return;
2884 static void
2885 mxge_intr(void *arg)
2887 struct mxge_slice_state *ss = arg;
2888 mxge_softc_t *sc = ss->sc;
2889 mcp_irq_data_t *stats = ss->fw_stats;
2890 mxge_tx_ring_t *tx = &ss->tx;
2891 mxge_rx_done_t *rx_done = &ss->rx_done;
2892 uint32_t send_done_count;
2893 uint8_t valid;
2896 #ifndef IFNET_BUF_RING
2897 /* an interrupt on a non-zero slice is implicitly valid
2898 since MSI-X irqs are not shared */
2899 if (ss != sc->ss) {
2900 mxge_clean_rx_done(ss);
2901 *ss->irq_claim = be32toh(3);
2902 return;
2904 #endif
2906 /* make sure the DMA has finished */
2907 if (!stats->valid) {
2908 return;
2910 valid = stats->valid;
2912 if (sc->legacy_irq) {
2913 /* lower legacy IRQ */
2914 *sc->irq_deassert = 0;
2915 if (!mxge_deassert_wait)
2916 /* don't wait for conf. that irq is low */
2917 stats->valid = 0;
2918 } else {
2919 stats->valid = 0;
2922 /* loop while waiting for legacy irq deassertion */
2923 do {
2924 /* check for transmit completes and receives */
2925 send_done_count = be32toh(stats->send_done_count);
2926 while ((send_done_count != tx->pkt_done) ||
2927 (rx_done->entry[rx_done->idx].length != 0)) {
2928 if (send_done_count != tx->pkt_done)
2929 mxge_tx_done(ss, (int)send_done_count);
2930 mxge_clean_rx_done(ss);
2931 send_done_count = be32toh(stats->send_done_count);
2933 if (sc->legacy_irq && mxge_deassert_wait)
2934 wmb();
2935 } while (*((volatile uint8_t *) &stats->valid));
2937 /* fw link & error stats meaningful only on the first slice */
2938 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
2939 if (sc->link_state != stats->link_up) {
2940 sc->link_state = stats->link_up;
2941 if (sc->link_state) {
2942 sc->ifp->if_link_state = LINK_STATE_UP;
2943 if_link_state_change(sc->ifp);
2944 if (mxge_verbose)
2945 device_printf(sc->dev, "link up\n");
2946 } else {
2947 sc->ifp->if_link_state = LINK_STATE_DOWN;
2948 if_link_state_change(sc->ifp);
2949 if (mxge_verbose)
2950 device_printf(sc->dev, "link down\n");
2952 sc->need_media_probe = 1;
2954 if (sc->rdma_tags_available !=
2955 be32toh(stats->rdma_tags_available)) {
2956 sc->rdma_tags_available =
2957 be32toh(stats->rdma_tags_available);
2958 device_printf(sc->dev, "RDMA timed out! %d tags "
2959 "left\n", sc->rdma_tags_available);
2962 if (stats->link_down) {
2963 sc->down_cnt += stats->link_down;
2964 sc->link_state = 0;
2965 sc->ifp->if_link_state = LINK_STATE_DOWN;
2966 if_link_state_change(sc->ifp);
2970 /* check to see if we have rx token to pass back */
2971 if (valid & 0x1)
2972 *ss->irq_claim = be32toh(3);
2973 *(ss->irq_claim + 1) = be32toh(3);
2976 static void
2977 mxge_init(void *arg)
2983 static void
2984 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
2986 struct lro_entry *lro_entry;
2987 int i;
2989 while (!SLIST_EMPTY(&ss->lro_free)) {
2990 lro_entry = SLIST_FIRST(&ss->lro_free);
2991 SLIST_REMOVE_HEAD(&ss->lro_free, next);
2992 kfree(lro_entry, M_DEVBUF);
2995 for (i = 0; i <= ss->rx_big.mask; i++) {
2996 if (ss->rx_big.info[i].m == NULL)
2997 continue;
2998 bus_dmamap_unload(ss->rx_big.dmat,
2999 ss->rx_big.info[i].map);
3000 m_freem(ss->rx_big.info[i].m);
3001 ss->rx_big.info[i].m = NULL;
3004 for (i = 0; i <= ss->rx_small.mask; i++) {
3005 if (ss->rx_small.info[i].m == NULL)
3006 continue;
3007 bus_dmamap_unload(ss->rx_small.dmat,
3008 ss->rx_small.info[i].map);
3009 m_freem(ss->rx_small.info[i].m);
3010 ss->rx_small.info[i].m = NULL;
3013 /* transmit ring used only on the first slice */
3014 if (ss->tx.info == NULL)
3015 return;
3017 for (i = 0; i <= ss->tx.mask; i++) {
3018 ss->tx.info[i].flag = 0;
3019 if (ss->tx.info[i].m == NULL)
3020 continue;
3021 bus_dmamap_unload(ss->tx.dmat,
3022 ss->tx.info[i].map);
3023 m_freem(ss->tx.info[i].m);
3024 ss->tx.info[i].m = NULL;
3028 static void
3029 mxge_free_mbufs(mxge_softc_t *sc)
3031 int slice;
3033 for (slice = 0; slice < sc->num_slices; slice++)
3034 mxge_free_slice_mbufs(&sc->ss[slice]);
3037 static void
3038 mxge_free_slice_rings(struct mxge_slice_state *ss)
3040 int i;
3043 if (ss->rx_done.entry != NULL)
3044 mxge_dma_free(&ss->rx_done.dma);
3045 ss->rx_done.entry = NULL;
3047 if (ss->tx.req_bytes != NULL)
3048 kfree(ss->tx.req_bytes, M_DEVBUF);
3049 ss->tx.req_bytes = NULL;
3051 if (ss->tx.seg_list != NULL)
3052 kfree(ss->tx.seg_list, M_DEVBUF);
3053 ss->tx.seg_list = NULL;
3055 if (ss->rx_small.shadow != NULL)
3056 kfree(ss->rx_small.shadow, M_DEVBUF);
3057 ss->rx_small.shadow = NULL;
3059 if (ss->rx_big.shadow != NULL)
3060 kfree(ss->rx_big.shadow, M_DEVBUF);
3061 ss->rx_big.shadow = NULL;
3063 if (ss->tx.info != NULL) {
3064 if (ss->tx.dmat != NULL) {
3065 for (i = 0; i <= ss->tx.mask; i++) {
3066 bus_dmamap_destroy(ss->tx.dmat,
3067 ss->tx.info[i].map);
3069 bus_dma_tag_destroy(ss->tx.dmat);
3071 kfree(ss->tx.info, M_DEVBUF);
3073 ss->tx.info = NULL;
3075 if (ss->rx_small.info != NULL) {
3076 if (ss->rx_small.dmat != NULL) {
3077 for (i = 0; i <= ss->rx_small.mask; i++) {
3078 bus_dmamap_destroy(ss->rx_small.dmat,
3079 ss->rx_small.info[i].map);
3081 bus_dmamap_destroy(ss->rx_small.dmat,
3082 ss->rx_small.extra_map);
3083 bus_dma_tag_destroy(ss->rx_small.dmat);
3085 kfree(ss->rx_small.info, M_DEVBUF);
3087 ss->rx_small.info = NULL;
3089 if (ss->rx_big.info != NULL) {
3090 if (ss->rx_big.dmat != NULL) {
3091 for (i = 0; i <= ss->rx_big.mask; i++) {
3092 bus_dmamap_destroy(ss->rx_big.dmat,
3093 ss->rx_big.info[i].map);
3095 bus_dmamap_destroy(ss->rx_big.dmat,
3096 ss->rx_big.extra_map);
3097 bus_dma_tag_destroy(ss->rx_big.dmat);
3099 kfree(ss->rx_big.info, M_DEVBUF);
3101 ss->rx_big.info = NULL;
3104 static void
3105 mxge_free_rings(mxge_softc_t *sc)
3107 int slice;
3109 for (slice = 0; slice < sc->num_slices; slice++)
3110 mxge_free_slice_rings(&sc->ss[slice]);
3113 static int
3114 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3115 int tx_ring_entries)
3117 mxge_softc_t *sc = ss->sc;
3118 size_t bytes;
3119 int err, i;
3121 err = ENOMEM;
3123 /* allocate per-slice receive resources */
3125 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3126 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3128 /* allocate the rx shadow rings */
3129 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3130 ss->rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3131 if (ss->rx_small.shadow == NULL)
3132 return err;;
3134 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3135 ss->rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3136 if (ss->rx_big.shadow == NULL)
3137 return err;;
3139 /* allocate the rx host info rings */
3140 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3141 ss->rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3142 if (ss->rx_small.info == NULL)
3143 return err;;
3145 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3146 ss->rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3147 if (ss->rx_big.info == NULL)
3148 return err;;
3150 /* allocate the rx busdma resources */
3151 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3152 1, /* alignment */
3153 4096, /* boundary */
3154 BUS_SPACE_MAXADDR, /* low */
3155 BUS_SPACE_MAXADDR, /* high */
3156 NULL, NULL, /* filter */
3157 MHLEN, /* maxsize */
3158 1, /* num segs */
3159 MHLEN, /* maxsegsize */
3160 BUS_DMA_ALLOCNOW, /* flags */
3161 NULL, NULL, /* lock */
3162 &ss->rx_small.dmat); /* tag */
3163 if (err != 0) {
3164 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3165 err);
3166 return err;;
3169 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3170 1, /* alignment */
3171 #if MXGE_VIRT_JUMBOS
3172 4096, /* boundary */
3173 #else
3174 0, /* boundary */
3175 #endif
3176 BUS_SPACE_MAXADDR, /* low */
3177 BUS_SPACE_MAXADDR, /* high */
3178 NULL, NULL, /* filter */
3179 3*4096, /* maxsize */
3180 #if MXGE_VIRT_JUMBOS
3181 3, /* num segs */
3182 4096, /* maxsegsize*/
3183 #else
3184 1, /* num segs */
3185 MJUM9BYTES, /* maxsegsize*/
3186 #endif
3187 BUS_DMA_ALLOCNOW, /* flags */
3188 NULL, NULL, /* lock */
3189 &ss->rx_big.dmat); /* tag */
3190 if (err != 0) {
3191 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3192 err);
3193 return err;;
3195 for (i = 0; i <= ss->rx_small.mask; i++) {
3196 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3197 &ss->rx_small.info[i].map);
3198 if (err != 0) {
3199 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3200 err);
3201 return err;;
3204 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3205 &ss->rx_small.extra_map);
3206 if (err != 0) {
3207 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3208 err);
3209 return err;;
3212 for (i = 0; i <= ss->rx_big.mask; i++) {
3213 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3214 &ss->rx_big.info[i].map);
3215 if (err != 0) {
3216 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3217 err);
3218 return err;;
3221 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3222 &ss->rx_big.extra_map);
3223 if (err != 0) {
3224 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3225 err);
3226 return err;;
3229 /* now allocate TX resouces */
3231 #ifndef IFNET_BUF_RING
3232 /* only use a single TX ring for now */
3233 if (ss != ss->sc->ss)
3234 return 0;
3235 #endif
3237 ss->tx.mask = tx_ring_entries - 1;
3238 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3241 /* allocate the tx request copy block */
3242 bytes = 8 +
3243 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3244 ss->tx.req_bytes = kmalloc(bytes, M_DEVBUF, M_WAITOK);
3245 if (ss->tx.req_bytes == NULL)
3246 return err;;
3247 /* ensure req_list entries are aligned to 8 bytes */
3248 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3249 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3251 /* allocate the tx busdma segment list */
3252 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3253 ss->tx.seg_list = (bus_dma_segment_t *)
3254 kmalloc(bytes, M_DEVBUF, M_WAITOK);
3255 if (ss->tx.seg_list == NULL)
3256 return err;;
3258 /* allocate the tx host info ring */
3259 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3260 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3261 if (ss->tx.info == NULL)
3262 return err;;
3264 /* allocate the tx busdma resources */
3265 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3266 1, /* alignment */
3267 sc->tx_boundary, /* boundary */
3268 BUS_SPACE_MAXADDR, /* low */
3269 BUS_SPACE_MAXADDR, /* high */
3270 NULL, NULL, /* filter */
3271 65536 + 256, /* maxsize */
3272 ss->tx.max_desc - 2, /* num segs */
3273 sc->tx_boundary, /* maxsegsz */
3274 BUS_DMA_ALLOCNOW, /* flags */
3275 NULL, NULL, /* lock */
3276 &ss->tx.dmat); /* tag */
3278 if (err != 0) {
3279 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3280 err);
3281 return err;;
3284 /* now use these tags to setup dmamaps for each slot
3285 in the ring */
3286 for (i = 0; i <= ss->tx.mask; i++) {
3287 err = bus_dmamap_create(ss->tx.dmat, 0,
3288 &ss->tx.info[i].map);
3289 if (err != 0) {
3290 device_printf(sc->dev, "Err %d tx dmamap\n",
3291 err);
3292 return err;;
3295 return 0;
3299 static int
3300 mxge_alloc_rings(mxge_softc_t *sc)
3302 mxge_cmd_t cmd;
3303 int tx_ring_size;
3304 int tx_ring_entries, rx_ring_entries;
3305 int err, slice;
3307 /* get ring sizes */
3308 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3309 tx_ring_size = cmd.data0;
3310 if (err != 0) {
3311 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3312 goto abort;
3315 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3316 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3317 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1);
3318 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen;
3319 IFQ_SET_READY(&sc->ifp->if_snd);
3321 for (slice = 0; slice < sc->num_slices; slice++) {
3322 err = mxge_alloc_slice_rings(&sc->ss[slice],
3323 rx_ring_entries,
3324 tx_ring_entries);
3325 if (err != 0)
3326 goto abort;
3328 return 0;
3330 abort:
3331 mxge_free_rings(sc);
3332 return err;
3337 static void
3338 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3340 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3342 if (bufsize < MCLBYTES) {
3343 /* easy, everything fits in a single buffer */
3344 *big_buf_size = MCLBYTES;
3345 *cl_size = MCLBYTES;
3346 *nbufs = 1;
3347 return;
3350 if (bufsize < MJUMPAGESIZE) {
3351 /* still easy, everything still fits in a single buffer */
3352 *big_buf_size = MJUMPAGESIZE;
3353 *cl_size = MJUMPAGESIZE;
3354 *nbufs = 1;
3355 return;
3357 #if MXGE_VIRT_JUMBOS
3358 /* now we need to use virtually contiguous buffers */
3359 *cl_size = MJUM9BYTES;
3360 *big_buf_size = 4096;
3361 *nbufs = mtu / 4096 + 1;
3362 /* needs to be a power of two, so round up */
3363 if (*nbufs == 3)
3364 *nbufs = 4;
3365 #else
3366 *cl_size = MJUM9BYTES;
3367 *big_buf_size = MJUM9BYTES;
3368 *nbufs = 1;
3369 #endif
3372 static int
3373 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3375 mxge_softc_t *sc;
3376 mxge_cmd_t cmd;
3377 bus_dmamap_t map;
3378 struct lro_entry *lro_entry;
3379 int err, i, slice;
3382 sc = ss->sc;
3383 slice = ss - sc->ss;
3385 SLIST_INIT(&ss->lro_free);
3386 SLIST_INIT(&ss->lro_active);
3388 for (i = 0; i < sc->lro_cnt; i++) {
3389 lro_entry = (struct lro_entry *)
3390 kmalloc(sizeof (*lro_entry), M_DEVBUF,
3391 M_NOWAIT | M_ZERO);
3392 if (lro_entry == NULL) {
3393 sc->lro_cnt = i;
3394 break;
3396 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3398 /* get the lanai pointers to the send and receive rings */
3400 err = 0;
3401 #ifndef IFNET_BUF_RING
3402 /* We currently only send from the first slice */
3403 if (slice == 0) {
3404 #endif
3405 cmd.data0 = slice;
3406 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3407 ss->tx.lanai =
3408 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3409 ss->tx.send_go = (volatile uint32_t *)
3410 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3411 ss->tx.send_stop = (volatile uint32_t *)
3412 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3413 #ifndef IFNET_BUF_RING
3415 #endif
3416 cmd.data0 = slice;
3417 err |= mxge_send_cmd(sc,
3418 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3419 ss->rx_small.lanai =
3420 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3421 cmd.data0 = slice;
3422 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3423 ss->rx_big.lanai =
3424 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3426 if (err != 0) {
3427 device_printf(sc->dev,
3428 "failed to get ring sizes or locations\n");
3429 return EIO;
3432 /* stock receive rings */
3433 for (i = 0; i <= ss->rx_small.mask; i++) {
3434 map = ss->rx_small.info[i].map;
3435 err = mxge_get_buf_small(ss, map, i);
3436 if (err) {
3437 device_printf(sc->dev, "alloced %d/%d smalls\n",
3438 i, ss->rx_small.mask + 1);
3439 return ENOMEM;
3442 for (i = 0; i <= ss->rx_big.mask; i++) {
3443 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3444 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3446 ss->rx_big.nbufs = nbufs;
3447 ss->rx_big.cl_size = cl_size;
3448 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3449 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD;
3450 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3451 map = ss->rx_big.info[i].map;
3452 err = mxge_get_buf_big(ss, map, i);
3453 if (err) {
3454 device_printf(sc->dev, "alloced %d/%d bigs\n",
3455 i, ss->rx_big.mask + 1);
3456 return ENOMEM;
3459 return 0;
3462 static int
3463 mxge_open(mxge_softc_t *sc)
3465 mxge_cmd_t cmd;
3466 int err, big_bytes, nbufs, slice, cl_size, i;
3467 bus_addr_t bus;
3468 volatile uint8_t *itable;
3469 struct mxge_slice_state *ss;
3471 /* Copy the MAC address in case it was overridden */
3472 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3474 err = mxge_reset(sc, 1);
3475 if (err != 0) {
3476 device_printf(sc->dev, "failed to reset\n");
3477 return EIO;
3480 if (sc->num_slices > 1) {
3481 /* setup the indirection table */
3482 cmd.data0 = sc->num_slices;
3483 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3484 &cmd);
3486 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3487 &cmd);
3488 if (err != 0) {
3489 device_printf(sc->dev,
3490 "failed to setup rss tables\n");
3491 return err;
3494 /* just enable an identity mapping */
3495 itable = sc->sram + cmd.data0;
3496 for (i = 0; i < sc->num_slices; i++)
3497 itable[i] = (uint8_t)i;
3499 cmd.data0 = 1;
3500 cmd.data1 = mxge_rss_hash_type;
3501 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3502 if (err != 0) {
3503 device_printf(sc->dev, "failed to enable slices\n");
3504 return err;
3509 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3511 cmd.data0 = nbufs;
3512 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3513 &cmd);
3514 /* error is only meaningful if we're trying to set
3515 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3516 if (err && nbufs > 1) {
3517 device_printf(sc->dev,
3518 "Failed to set alway-use-n to %d\n",
3519 nbufs);
3520 return EIO;
3522 /* Give the firmware the mtu and the big and small buffer
3523 sizes. The firmware wants the big buf size to be a power
3524 of two. Luckily, FreeBSD's clusters are powers of two */
3525 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3526 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3527 cmd.data0 = MHLEN - MXGEFW_PAD;
3528 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3529 &cmd);
3530 cmd.data0 = big_bytes;
3531 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3533 if (err != 0) {
3534 device_printf(sc->dev, "failed to setup params\n");
3535 goto abort;
3538 /* Now give him the pointer to the stats block */
3539 for (slice = 0;
3540 #ifdef IFNET_BUF_RING
3541 slice < sc->num_slices;
3542 #else
3543 slice < 1;
3544 #endif
3545 slice++) {
3546 ss = &sc->ss[slice];
3547 cmd.data0 =
3548 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3549 cmd.data1 =
3550 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3551 cmd.data2 = sizeof(struct mcp_irq_data);
3552 cmd.data2 |= (slice << 16);
3553 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3556 if (err != 0) {
3557 bus = sc->ss->fw_stats_dma.bus_addr;
3558 bus += offsetof(struct mcp_irq_data, send_done_count);
3559 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3560 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3561 err = mxge_send_cmd(sc,
3562 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3563 &cmd);
3564 /* Firmware cannot support multicast without STATS_DMA_V2 */
3565 sc->fw_multicast_support = 0;
3566 } else {
3567 sc->fw_multicast_support = 1;
3570 if (err != 0) {
3571 device_printf(sc->dev, "failed to setup params\n");
3572 goto abort;
3575 for (slice = 0; slice < sc->num_slices; slice++) {
3576 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3577 if (err != 0) {
3578 device_printf(sc->dev, "couldn't open slice %d\n",
3579 slice);
3580 goto abort;
3584 /* Finally, start the firmware running */
3585 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3586 if (err) {
3587 device_printf(sc->dev, "Couldn't bring up link\n");
3588 goto abort;
3590 #ifdef IFNET_BUF_RING
3591 for (slice = 0; slice < sc->num_slices; slice++) {
3592 ss = &sc->ss[slice];
3593 ss->if_drv_flags |= IFF_DRV_RUNNING;
3594 ss->if_drv_flags &= ~IFF_DRV_OACTIVE;
3596 #endif
3597 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
3598 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3599 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3601 return 0;
3604 abort:
3605 mxge_free_mbufs(sc);
3607 return err;
3610 static int
3611 mxge_close(mxge_softc_t *sc)
3613 mxge_cmd_t cmd;
3614 int err, old_down_cnt;
3615 #ifdef IFNET_BUF_RING
3616 struct mxge_slice_state *ss;
3617 int slice;
3618 #endif
3620 callout_stop(&sc->co_hdl);
3621 #ifdef IFNET_BUF_RING
3622 for (slice = 0; slice < sc->num_slices; slice++) {
3623 ss = &sc->ss[slice];
3624 ss->if_drv_flags &= ~IFF_DRV_RUNNING;
3626 #endif
3627 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3628 old_down_cnt = sc->down_cnt;
3629 wmb();
3630 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3631 if (err) {
3632 device_printf(sc->dev, "Couldn't bring down link\n");
3634 if (old_down_cnt == sc->down_cnt) {
3635 /* wait for down irq */
3636 DELAY(10 * sc->intr_coal_delay);
3638 wmb();
3639 if (old_down_cnt == sc->down_cnt) {
3640 device_printf(sc->dev, "never got down irq\n");
3643 mxge_free_mbufs(sc);
3645 return 0;
3648 static void
3649 mxge_setup_cfg_space(mxge_softc_t *sc)
3651 device_t dev = sc->dev;
3652 int reg;
3653 uint16_t cmd, lnk, pectl;
3655 /* find the PCIe link width and set max read request to 4KB*/
3656 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3657 lnk = pci_read_config(dev, reg + 0x12, 2);
3658 sc->link_width = (lnk >> 4) & 0x3f;
3660 pectl = pci_read_config(dev, reg + 0x8, 2);
3661 pectl = (pectl & ~0x7000) | (5 << 12);
3662 pci_write_config(dev, reg + 0x8, pectl, 2);
3665 /* Enable DMA and Memory space access */
3666 pci_enable_busmaster(dev);
3667 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3668 cmd |= PCIM_CMD_MEMEN;
3669 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3672 static uint32_t
3673 mxge_read_reboot(mxge_softc_t *sc)
3675 device_t dev = sc->dev;
3676 uint32_t vs;
3678 /* find the vendor specific offset */
3679 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3680 device_printf(sc->dev,
3681 "could not find vendor specific offset\n");
3682 return (uint32_t)-1;
3684 /* enable read32 mode */
3685 pci_write_config(dev, vs + 0x10, 0x3, 1);
3686 /* tell NIC which register to read */
3687 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3688 return (pci_read_config(dev, vs + 0x14, 4));
3691 static int
3692 mxge_watchdog_reset(mxge_softc_t *sc, int slice)
3694 struct pci_devinfo *dinfo;
3695 mxge_tx_ring_t *tx;
3696 int err;
3697 uint32_t reboot;
3698 uint16_t cmd;
3700 err = ENXIO;
3702 device_printf(sc->dev, "Watchdog reset!\n");
3705 * check to see if the NIC rebooted. If it did, then all of
3706 * PCI config space has been reset, and things like the
3707 * busmaster bit will be zero. If this is the case, then we
3708 * must restore PCI config space before the NIC can be used
3709 * again
3711 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3712 if (cmd == 0xffff) {
3714 * maybe the watchdog caught the NIC rebooting; wait
3715 * up to 100ms for it to finish. If it does not come
3716 * back, then give up
3718 DELAY(1000*100);
3719 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3720 if (cmd == 0xffff) {
3721 device_printf(sc->dev, "NIC disappeared!\n");
3722 return (err);
3725 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3726 /* print the reboot status */
3727 reboot = mxge_read_reboot(sc);
3728 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3729 reboot);
3730 /* restore PCI configuration space */
3731 dinfo = device_get_ivars(sc->dev);
3732 pci_cfg_restore(sc->dev, dinfo);
3734 /* and redo any changes we made to our config space */
3735 mxge_setup_cfg_space(sc);
3737 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) {
3738 mxge_close(sc);
3739 err = mxge_open(sc);
3741 } else {
3742 tx = &sc->ss[slice].tx;
3743 device_printf(sc->dev,
3744 "NIC did not reboot, slice %d ring state:\n",
3745 slice);
3746 device_printf(sc->dev,
3747 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3748 tx->req, tx->done, tx->queue_active);
3749 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3750 tx->activate, tx->deactivate);
3751 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3752 tx->pkt_done,
3753 be32toh(sc->ss->fw_stats->send_done_count));
3754 device_printf(sc->dev, "not resetting\n");
3756 return (err);
3759 static int
3760 mxge_watchdog(mxge_softc_t *sc)
3762 mxge_tx_ring_t *tx;
3763 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3764 int i, err = 0;
3766 /* see if we have outstanding transmits, which
3767 have been pending for more than mxge_ticks */
3768 for (i = 0;
3769 #ifdef IFNET_BUF_RING
3770 (i < sc->num_slices) && (err == 0);
3771 #else
3772 (i < 1) && (err == 0);
3773 #endif
3774 i++) {
3775 tx = &sc->ss[i].tx;
3776 if (tx->req != tx->done &&
3777 tx->watchdog_req != tx->watchdog_done &&
3778 tx->done == tx->watchdog_done) {
3779 /* check for pause blocking before resetting */
3780 if (tx->watchdog_rx_pause == rx_pause)
3781 err = mxge_watchdog_reset(sc, i);
3782 else
3783 device_printf(sc->dev, "Flow control blocking "
3784 "xmits, check link partner\n");
3787 tx->watchdog_req = tx->req;
3788 tx->watchdog_done = tx->done;
3789 tx->watchdog_rx_pause = rx_pause;
3792 if (sc->need_media_probe)
3793 mxge_media_probe(sc);
3794 return (err);
3797 static void
3798 mxge_update_stats(mxge_softc_t *sc)
3800 struct mxge_slice_state *ss;
3801 u_long ipackets = 0;
3802 u_long opackets = 0;
3803 #ifdef IFNET_BUF_RING
3804 u_long obytes = 0;
3805 u_long omcasts = 0;
3806 u_long odrops = 0;
3807 #endif
3808 u_long oerrors = 0;
3809 int slice;
3811 for (slice = 0; slice < sc->num_slices; slice++) {
3812 ss = &sc->ss[slice];
3813 ipackets += ss->ipackets;
3814 opackets += ss->opackets;
3815 #ifdef IFNET_BUF_RING
3816 obytes += ss->obytes;
3817 omcasts += ss->omcasts;
3818 odrops += ss->tx.br->br_drops;
3819 #endif
3820 oerrors += ss->oerrors;
3822 sc->ifp->if_ipackets = ipackets;
3823 sc->ifp->if_opackets = opackets;
3824 #ifdef IFNET_BUF_RING
3825 sc->ifp->if_obytes = obytes;
3826 sc->ifp->if_omcasts = omcasts;
3827 sc->ifp->if_snd.ifq_drops = odrops;
3828 #endif
3829 sc->ifp->if_oerrors = oerrors;
3832 static void
3833 mxge_tick(void *arg)
3835 mxge_softc_t *sc = arg;
3836 int err = 0;
3838 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
3839 /* aggregate stats from different slices */
3840 mxge_update_stats(sc);
3841 if (!sc->watchdog_countdown) {
3842 err = mxge_watchdog(sc);
3843 sc->watchdog_countdown = 4;
3845 sc->watchdog_countdown--;
3846 if (err == 0)
3847 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3848 lockmgr(&sc->driver_lock, LK_RELEASE);
3851 static int
3852 mxge_media_change(struct ifnet *ifp)
3854 return EINVAL;
3857 static int
3858 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3860 struct ifnet *ifp = sc->ifp;
3861 int real_mtu, old_mtu;
3862 int err = 0;
3865 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3866 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3867 return EINVAL;
3868 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
3869 old_mtu = ifp->if_mtu;
3870 ifp->if_mtu = mtu;
3871 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3872 mxge_close(sc);
3873 err = mxge_open(sc);
3874 if (err != 0) {
3875 ifp->if_mtu = old_mtu;
3876 mxge_close(sc);
3877 (void) mxge_open(sc);
3880 lockmgr(&sc->driver_lock, LK_RELEASE);
3881 return err;
3884 static void
3885 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3887 mxge_softc_t *sc = ifp->if_softc;
3890 if (sc == NULL)
3891 return;
3892 ifmr->ifm_status = IFM_AVALID;
3893 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3894 ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
3895 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
3898 static int
3899 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
3901 mxge_softc_t *sc = ifp->if_softc;
3902 struct ifreq *ifr = (struct ifreq *)data;
3903 int err, mask;
3905 (void)cr;
3906 err = 0;
3907 switch (command) {
3908 case SIOCSIFADDR:
3909 case SIOCGIFADDR:
3910 err = ether_ioctl(ifp, command, data);
3911 break;
3913 case SIOCSIFMTU:
3914 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3915 break;
3917 case SIOCSIFFLAGS:
3918 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
3919 if (sc->dying) {
3920 lockmgr(&sc->driver_lock, LK_RELEASE);
3921 return EINVAL;
3923 if (ifp->if_flags & IFF_UP) {
3924 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3925 err = mxge_open(sc);
3926 } else {
3927 /* take care of promis can allmulti
3928 flag chages */
3929 mxge_change_promisc(sc,
3930 ifp->if_flags & IFF_PROMISC);
3931 mxge_set_multicast_list(sc);
3933 } else {
3934 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3935 mxge_close(sc);
3938 lockmgr(&sc->driver_lock, LK_RELEASE);
3939 break;
3941 case SIOCADDMULTI:
3942 case SIOCDELMULTI:
3943 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
3944 mxge_set_multicast_list(sc);
3945 lockmgr(&sc->driver_lock, LK_RELEASE);
3946 break;
3948 case SIOCSIFCAP:
3949 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
3950 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3951 if (mask & IFCAP_TXCSUM) {
3952 if (IFCAP_TXCSUM & ifp->if_capenable) {
3953 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
3954 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
3955 | CSUM_TSO);
3956 } else {
3957 ifp->if_capenable |= IFCAP_TXCSUM;
3958 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
3960 } else if (mask & IFCAP_RXCSUM) {
3961 if (IFCAP_RXCSUM & ifp->if_capenable) {
3962 ifp->if_capenable &= ~IFCAP_RXCSUM;
3963 sc->csum_flag = 0;
3964 } else {
3965 ifp->if_capenable |= IFCAP_RXCSUM;
3966 sc->csum_flag = 1;
3969 if (mask & IFCAP_TSO4) {
3970 if (IFCAP_TSO4 & ifp->if_capenable) {
3971 ifp->if_capenable &= ~IFCAP_TSO4;
3972 ifp->if_hwassist &= ~CSUM_TSO;
3973 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
3974 ifp->if_capenable |= IFCAP_TSO4;
3975 ifp->if_hwassist |= CSUM_TSO;
3976 } else {
3977 kprintf("mxge requires tx checksum offload"
3978 " be enabled to use TSO\n");
3979 err = EINVAL;
3982 if (mask & IFCAP_LRO) {
3983 if (IFCAP_LRO & ifp->if_capenable)
3984 err = mxge_change_lro_locked(sc, 0);
3985 else
3986 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
3988 if (mask & IFCAP_VLAN_HWTAGGING)
3989 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3990 lockmgr(&sc->driver_lock, LK_RELEASE);
3991 VLAN_CAPABILITIES(ifp);
3993 break;
3995 case SIOCGIFMEDIA:
3996 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
3997 &sc->media, command);
3998 break;
4000 default:
4001 err = ENOTTY;
4003 return err;
4006 static void
4007 mxge_fetch_tunables(mxge_softc_t *sc)
4010 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4011 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4012 &mxge_flow_control);
4013 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4014 &mxge_intr_coal_delay);
4015 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4016 &mxge_nvidia_ecrc_enable);
4017 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4018 &mxge_force_firmware);
4019 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4020 &mxge_deassert_wait);
4021 TUNABLE_INT_FETCH("hw.mxge.verbose",
4022 &mxge_verbose);
4023 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4024 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
4025 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4026 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4027 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4028 if (sc->lro_cnt != 0)
4029 mxge_lro_cnt = sc->lro_cnt;
4031 if (bootverbose)
4032 mxge_verbose = 1;
4033 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4034 mxge_intr_coal_delay = 30;
4035 if (mxge_ticks == 0)
4036 mxge_ticks = hz / 2;
4037 sc->pause = mxge_flow_control;
4038 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4039 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4040 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
4042 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4043 mxge_initial_mtu < ETHER_MIN_LEN)
4044 mxge_initial_mtu = ETHERMTU_JUMBO;
4048 static void
4049 mxge_free_slices(mxge_softc_t *sc)
4051 struct mxge_slice_state *ss;
4052 int i;
4055 if (sc->ss == NULL)
4056 return;
4058 for (i = 0; i < sc->num_slices; i++) {
4059 ss = &sc->ss[i];
4060 if (ss->fw_stats != NULL) {
4061 mxge_dma_free(&ss->fw_stats_dma);
4062 ss->fw_stats = NULL;
4063 #ifdef IFNET_BUF_RING
4064 if (ss->tx.br != NULL) {
4065 drbr_free(ss->tx.br, M_DEVBUF);
4066 ss->tx.br = NULL;
4068 #endif
4069 lockuninit(&ss->tx.lock);
4071 if (ss->rx_done.entry != NULL) {
4072 mxge_dma_free(&ss->rx_done.dma);
4073 ss->rx_done.entry = NULL;
4076 kfree(sc->ss, M_DEVBUF);
4077 sc->ss = NULL;
4080 static int
4081 mxge_alloc_slices(mxge_softc_t *sc)
4083 mxge_cmd_t cmd;
4084 struct mxge_slice_state *ss;
4085 size_t bytes;
4086 int err, i, max_intr_slots;
4088 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4089 if (err != 0) {
4090 device_printf(sc->dev, "Cannot determine rx ring size\n");
4091 return err;
4093 sc->rx_ring_size = cmd.data0;
4094 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4096 bytes = sizeof (*sc->ss) * sc->num_slices;
4097 sc->ss = kmalloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4098 if (sc->ss == NULL)
4099 return (ENOMEM);
4100 for (i = 0; i < sc->num_slices; i++) {
4101 ss = &sc->ss[i];
4103 ss->sc = sc;
4105 /* allocate per-slice rx interrupt queues */
4107 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4108 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4109 if (err != 0)
4110 goto abort;
4111 ss->rx_done.entry = ss->rx_done.dma.addr;
4112 bzero(ss->rx_done.entry, bytes);
4115 * allocate the per-slice firmware stats; stats
4116 * (including tx) are used used only on the first
4117 * slice for now
4119 #ifndef IFNET_BUF_RING
4120 if (i > 0)
4121 continue;
4122 #endif
4124 bytes = sizeof (*ss->fw_stats);
4125 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4126 sizeof (*ss->fw_stats), 64);
4127 if (err != 0)
4128 goto abort;
4129 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4130 ksnprintf(ss->tx.lock_name, sizeof(ss->tx.lock_name),
4131 "%s:tx(%d)", device_get_nameunit(sc->dev), i);
4132 lockinit(&ss->tx.lock, ss->tx.lock_name, 0, LK_CANRECURSE);
4133 #ifdef IFNET_BUF_RING
4134 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4135 &ss->tx.lock);
4136 #endif
4139 return (0);
4141 abort:
4142 mxge_free_slices(sc);
4143 return (ENOMEM);
4146 static void
4147 mxge_slice_probe(mxge_softc_t *sc)
4149 mxge_cmd_t cmd;
4150 char *old_fw;
4151 int msix_cnt, status, max_intr_slots;
4153 sc->num_slices = 1;
4155 * don't enable multiple slices if they are not enabled,
4156 * or if this is not an SMP system
4159 if (mxge_max_slices == 0 || mxge_max_slices == 1 || ncpus < 2)
4160 return;
4162 /* see how many MSI-X interrupts are available */
4163 msix_cnt = pci_msix_count(sc->dev);
4164 if (msix_cnt < 2)
4165 return;
4167 /* now load the slice aware firmware see what it supports */
4168 old_fw = sc->fw_name;
4169 if (old_fw == mxge_fw_aligned)
4170 sc->fw_name = mxge_fw_rss_aligned;
4171 else
4172 sc->fw_name = mxge_fw_rss_unaligned;
4173 status = mxge_load_firmware(sc, 0);
4174 if (status != 0) {
4175 device_printf(sc->dev, "Falling back to a single slice\n");
4176 return;
4179 /* try to send a reset command to the card to see if it
4180 is alive */
4181 memset(&cmd, 0, sizeof (cmd));
4182 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4183 if (status != 0) {
4184 device_printf(sc->dev, "failed reset\n");
4185 goto abort_with_fw;
4188 /* get rx ring size */
4189 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4190 if (status != 0) {
4191 device_printf(sc->dev, "Cannot determine rx ring size\n");
4192 goto abort_with_fw;
4194 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4196 /* tell it the size of the interrupt queues */
4197 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4198 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4199 if (status != 0) {
4200 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4201 goto abort_with_fw;
4204 /* ask the maximum number of slices it supports */
4205 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4206 if (status != 0) {
4207 device_printf(sc->dev,
4208 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4209 goto abort_with_fw;
4211 sc->num_slices = cmd.data0;
4212 if (sc->num_slices > msix_cnt)
4213 sc->num_slices = msix_cnt;
4215 if (mxge_max_slices == -1) {
4216 /* cap to number of CPUs in system */
4217 if (sc->num_slices > ncpus)
4218 sc->num_slices = ncpus;
4219 } else {
4220 if (sc->num_slices > mxge_max_slices)
4221 sc->num_slices = mxge_max_slices;
4223 /* make sure it is a power of two */
4224 while (sc->num_slices & (sc->num_slices - 1))
4225 sc->num_slices--;
4227 if (mxge_verbose)
4228 device_printf(sc->dev, "using %d slices\n",
4229 sc->num_slices);
4231 return;
4233 abort_with_fw:
4234 sc->fw_name = old_fw;
4235 (void) mxge_load_firmware(sc, 0);
4238 static int
4239 mxge_add_msix_irqs(mxge_softc_t *sc)
4241 size_t bytes;
4242 int count, err, i, rid;
4244 rid = PCIR_BAR(2);
4245 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4246 &rid, RF_ACTIVE);
4248 if (sc->msix_table_res == NULL) {
4249 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4250 return ENXIO;
4253 count = sc->num_slices;
4254 err = pci_alloc_msix(sc->dev, &count);
4255 if (err != 0) {
4256 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4257 "err = %d \n", sc->num_slices, err);
4258 goto abort_with_msix_table;
4260 if (count < sc->num_slices) {
4261 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4262 count, sc->num_slices);
4263 device_printf(sc->dev,
4264 "Try setting hw.mxge.max_slices to %d\n",
4265 count);
4266 err = ENOSPC;
4267 goto abort_with_msix;
4269 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4270 sc->msix_irq_res = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4271 if (sc->msix_irq_res == NULL) {
4272 err = ENOMEM;
4273 goto abort_with_msix;
4276 for (i = 0; i < sc->num_slices; i++) {
4277 rid = i + 1;
4278 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4279 SYS_RES_IRQ,
4280 &rid, RF_ACTIVE);
4281 if (sc->msix_irq_res[i] == NULL) {
4282 device_printf(sc->dev, "couldn't allocate IRQ res"
4283 " for message %d\n", i);
4284 err = ENXIO;
4285 goto abort_with_res;
4289 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4290 sc->msix_ih = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4292 for (i = 0; i < sc->num_slices; i++) {
4293 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4294 INTR_TYPE_NET | INTR_MPSAFE,
4295 #if __FreeBSD_version > 700030
4296 NULL,
4297 #endif
4298 mxge_intr, &sc->ss[i], &sc->msix_ih[i]);
4299 if (err != 0) {
4300 device_printf(sc->dev, "couldn't setup intr for "
4301 "message %d\n", i);
4302 goto abort_with_intr;
4306 if (mxge_verbose) {
4307 device_printf(sc->dev, "using %d msix IRQs:",
4308 sc->num_slices);
4309 for (i = 0; i < sc->num_slices; i++)
4310 kprintf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4311 kprintf("\n");
4313 return (0);
4315 abort_with_intr:
4316 for (i = 0; i < sc->num_slices; i++) {
4317 if (sc->msix_ih[i] != NULL) {
4318 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4319 sc->msix_ih[i]);
4320 sc->msix_ih[i] = NULL;
4323 kfree(sc->msix_ih, M_DEVBUF);
4326 abort_with_res:
4327 for (i = 0; i < sc->num_slices; i++) {
4328 rid = i + 1;
4329 if (sc->msix_irq_res[i] != NULL)
4330 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4331 sc->msix_irq_res[i]);
4332 sc->msix_irq_res[i] = NULL;
4334 kfree(sc->msix_irq_res, M_DEVBUF);
4337 abort_with_msix:
4338 pci_release_msi(sc->dev);
4340 abort_with_msix_table:
4341 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4342 sc->msix_table_res);
4344 return err;
4347 static int
4348 mxge_add_single_irq(mxge_softc_t *sc)
4350 int count, err, rid;
4352 count = pci_msi_count(sc->dev);
4353 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4354 rid = 1;
4355 } else {
4356 rid = 0;
4357 sc->legacy_irq = 1;
4359 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4360 1, RF_SHAREABLE | RF_ACTIVE);
4361 if (sc->irq_res == NULL) {
4362 device_printf(sc->dev, "could not alloc interrupt\n");
4363 return ENXIO;
4365 if (mxge_verbose)
4366 device_printf(sc->dev, "using %s irq %ld\n",
4367 sc->legacy_irq ? "INTx" : "MSI",
4368 rman_get_start(sc->irq_res));
4369 err = bus_setup_intr(sc->dev, sc->irq_res,
4370 INTR_TYPE_NET | INTR_MPSAFE,
4371 #if __FreeBSD_version > 700030
4372 NULL,
4373 #endif
4374 mxge_intr, &sc->ss[0], &sc->ih);
4375 if (err != 0) {
4376 bus_release_resource(sc->dev, SYS_RES_IRQ,
4377 sc->legacy_irq ? 0 : 1, sc->irq_res);
4378 if (!sc->legacy_irq)
4379 pci_release_msi(sc->dev);
4381 return err;
4384 static void
4385 mxge_rem_msix_irqs(mxge_softc_t *sc)
4387 int i, rid;
4389 for (i = 0; i < sc->num_slices; i++) {
4390 if (sc->msix_ih[i] != NULL) {
4391 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4392 sc->msix_ih[i]);
4393 sc->msix_ih[i] = NULL;
4396 kfree(sc->msix_ih, M_DEVBUF);
4398 for (i = 0; i < sc->num_slices; i++) {
4399 rid = i + 1;
4400 if (sc->msix_irq_res[i] != NULL)
4401 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4402 sc->msix_irq_res[i]);
4403 sc->msix_irq_res[i] = NULL;
4405 kfree(sc->msix_irq_res, M_DEVBUF);
4407 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4408 sc->msix_table_res);
4410 pci_release_msi(sc->dev);
4411 return;
4414 static void
4415 mxge_rem_single_irq(mxge_softc_t *sc)
4417 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4418 bus_release_resource(sc->dev, SYS_RES_IRQ,
4419 sc->legacy_irq ? 0 : 1, sc->irq_res);
4420 if (!sc->legacy_irq)
4421 pci_release_msi(sc->dev);
4424 static void
4425 mxge_rem_irq(mxge_softc_t *sc)
4427 if (sc->num_slices > 1)
4428 mxge_rem_msix_irqs(sc);
4429 else
4430 mxge_rem_single_irq(sc);
4433 static int
4434 mxge_add_irq(mxge_softc_t *sc)
4436 int err;
4438 if (sc->num_slices > 1)
4439 err = mxge_add_msix_irqs(sc);
4440 else
4441 err = mxge_add_single_irq(sc);
4443 if (0 && err == 0 && sc->num_slices > 1) {
4444 mxge_rem_msix_irqs(sc);
4445 err = mxge_add_msix_irqs(sc);
4447 return err;
4451 static int
4452 mxge_attach(device_t dev)
4454 mxge_softc_t *sc = device_get_softc(dev);
4455 struct ifnet *ifp = &sc->arpcom.ac_if;
4456 int err, rid;
4459 * avoid rewriting half the lines in this file to use
4460 * &sc->arpcom.ac_if instead
4462 sc->ifp = ifp;
4463 sc->dev = dev;
4464 mxge_fetch_tunables(sc);
4466 err = bus_dma_tag_create(NULL, /* parent */
4467 1, /* alignment */
4468 0, /* boundary */
4469 BUS_SPACE_MAXADDR, /* low */
4470 BUS_SPACE_MAXADDR, /* high */
4471 NULL, NULL, /* filter */
4472 65536 + 256, /* maxsize */
4473 MXGE_MAX_SEND_DESC, /* num segs */
4474 65536, /* maxsegsize */
4475 0, /* flags */
4476 NULL, NULL, /* lock */
4477 &sc->parent_dmat); /* tag */
4479 if (err != 0) {
4480 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4481 err);
4482 goto abort_with_nothing;
4485 sc->ifp = ifp;
4486 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4488 ksnprintf(sc->cmd_lock_name, sizeof(sc->cmd_lock_name), "%s:cmd",
4489 device_get_nameunit(dev));
4490 lockinit(&sc->cmd_lock, sc->cmd_lock_name, 0, LK_CANRECURSE);
4491 ksnprintf(sc->driver_lock_name, sizeof(sc->driver_lock_name),
4492 "%s:drv", device_get_nameunit(dev));
4493 lockinit(&sc->driver_lock, sc->driver_lock_name,
4494 0, LK_CANRECURSE);
4496 callout_init(&sc->co_hdl);
4498 mxge_setup_cfg_space(sc);
4500 /* Map the board into the kernel */
4501 rid = PCIR_BARS;
4502 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4503 ~0, 1, RF_ACTIVE);
4504 if (sc->mem_res == NULL) {
4505 device_printf(dev, "could not map memory\n");
4506 err = ENXIO;
4507 goto abort_with_lock;
4509 sc->sram = rman_get_virtual(sc->mem_res);
4510 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4511 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4512 device_printf(dev, "impossible memory region size %ld\n",
4513 rman_get_size(sc->mem_res));
4514 err = ENXIO;
4515 goto abort_with_mem_res;
4518 /* make NULL terminated copy of the EEPROM strings section of
4519 lanai SRAM */
4520 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4521 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4522 rman_get_bushandle(sc->mem_res),
4523 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4524 sc->eeprom_strings,
4525 MXGE_EEPROM_STRINGS_SIZE - 2);
4526 err = mxge_parse_strings(sc);
4527 if (err != 0)
4528 goto abort_with_mem_res;
4530 /* Enable write combining for efficient use of PCIe bus */
4531 mxge_enable_wc(sc);
4533 /* Allocate the out of band dma memory */
4534 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4535 sizeof (mxge_cmd_t), 64);
4536 if (err != 0)
4537 goto abort_with_mem_res;
4538 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4539 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4540 if (err != 0)
4541 goto abort_with_cmd_dma;
4543 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4544 if (err != 0)
4545 goto abort_with_zeropad_dma;
4547 /* select & load the firmware */
4548 err = mxge_select_firmware(sc);
4549 if (err != 0)
4550 goto abort_with_dmabench;
4551 sc->intr_coal_delay = mxge_intr_coal_delay;
4553 mxge_slice_probe(sc);
4554 err = mxge_alloc_slices(sc);
4555 if (err != 0)
4556 goto abort_with_dmabench;
4558 err = mxge_reset(sc, 0);
4559 if (err != 0)
4560 goto abort_with_slices;
4562 err = mxge_alloc_rings(sc);
4563 if (err != 0) {
4564 device_printf(sc->dev, "failed to allocate rings\n");
4565 goto abort_with_dmabench;
4568 err = mxge_add_irq(sc);
4569 if (err != 0) {
4570 device_printf(sc->dev, "failed to add irq\n");
4571 goto abort_with_rings;
4574 ifp->if_baudrate = IF_Gbps(10UL);
4575 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4576 IFCAP_VLAN_MTU;
4577 #ifdef INET
4578 ifp->if_capabilities |= IFCAP_LRO;
4579 #endif
4581 #ifdef MXGE_NEW_VLAN_API
4582 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4583 #endif
4585 sc->max_mtu = mxge_max_mtu(sc);
4586 if (sc->max_mtu >= 9000)
4587 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4588 else
4589 device_printf(dev, "MTU limited to %d. Install "
4590 "latest firmware for 9000 byte jumbo support\n",
4591 sc->max_mtu - ETHER_HDR_LEN);
4592 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4593 ifp->if_capenable = ifp->if_capabilities;
4594 if (sc->lro_cnt == 0)
4595 ifp->if_capenable &= ~IFCAP_LRO;
4596 sc->csum_flag = 1;
4597 ifp->if_init = mxge_init;
4598 ifp->if_softc = sc;
4599 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4600 ifp->if_ioctl = mxge_ioctl;
4601 ifp->if_start = mxge_start;
4602 /* Initialise the ifmedia structure */
4603 ifmedia_init(&sc->media, 0, mxge_media_change,
4604 mxge_media_status);
4605 mxge_set_media(sc, IFM_ETHER | IFM_AUTO);
4606 mxge_media_probe(sc);
4607 sc->dying = 0;
4608 ether_ifattach(ifp, sc->mac_addr);
4609 /* ether_ifattach sets mtu to ETHERMTU */
4610 if (mxge_initial_mtu != ETHERMTU)
4611 mxge_change_mtu(sc, mxge_initial_mtu);
4613 mxge_add_sysctls(sc);
4614 #ifdef IFNET_BUF_RING
4615 ifp->if_transmit = mxge_transmit;
4616 ifp->if_qflush = mxge_qflush;
4617 #endif
4618 return 0;
4620 abort_with_rings:
4621 mxge_free_rings(sc);
4622 abort_with_slices:
4623 mxge_free_slices(sc);
4624 abort_with_dmabench:
4625 mxge_dma_free(&sc->dmabench_dma);
4626 abort_with_zeropad_dma:
4627 mxge_dma_free(&sc->zeropad_dma);
4628 abort_with_cmd_dma:
4629 mxge_dma_free(&sc->cmd_dma);
4630 abort_with_mem_res:
4631 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4632 abort_with_lock:
4633 pci_disable_busmaster(dev);
4634 lockuninit(&sc->cmd_lock);
4635 lockuninit(&sc->driver_lock);
4636 if_free(ifp);
4637 abort_with_parent_dmat:
4638 bus_dma_tag_destroy(sc->parent_dmat);
4640 abort_with_nothing:
4641 return err;
4644 static int
4645 mxge_detach(device_t dev)
4647 mxge_softc_t *sc = device_get_softc(dev);
4649 if (mxge_vlans_active(sc)) {
4650 device_printf(sc->dev,
4651 "Detach vlans before removing module\n");
4652 return EBUSY;
4654 lockmgr(&sc->driver_lock, LK_EXCLUSIVE);
4655 sc->dying = 1;
4656 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
4657 mxge_close(sc);
4658 lockmgr(&sc->driver_lock, LK_RELEASE);
4659 ether_ifdetach(sc->ifp);
4660 callout_drain(&sc->co_hdl);
4661 ifmedia_removeall(&sc->media);
4662 mxge_dummy_rdma(sc, 0);
4663 mxge_rem_sysctls(sc);
4664 mxge_rem_irq(sc);
4665 mxge_free_rings(sc);
4666 mxge_free_slices(sc);
4667 mxge_dma_free(&sc->dmabench_dma);
4668 mxge_dma_free(&sc->zeropad_dma);
4669 mxge_dma_free(&sc->cmd_dma);
4670 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4671 pci_disable_busmaster(dev);
4672 lockuninit(&sc->cmd_lock);
4673 lockuninit(&sc->driver_lock);
4674 if_free(sc->ifp);
4675 bus_dma_tag_destroy(sc->parent_dmat);
4676 return 0;
4679 static int
4680 mxge_shutdown(device_t dev)
4682 return 0;
4686 This file uses Myri10GE driver indentation.
4688 Local Variables:
4689 c-file-style:"linux"
4690 tab-width:8
4691 End: