mxge: document jumbo buffer hack
[dragonfly.git] / sys / dev / netif / mxge / if_mxge.c
blob8945ee6425bb6bcdbf7ef64c1ac4740a04dfbc69
1 /******************************************************************************
3 Copyright (c) 2006-2009, Myricom Inc.
4 All rights reserved.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 /*__FBSDID("$FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $");*/
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/in_cksum.h>
39 #include <sys/sockio.h>
40 #include <sys/mbuf.h>
41 #include <sys/malloc.h>
42 #include <sys/kernel.h>
43 #include <sys/module.h>
44 #include <sys/serialize.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
48 /* count xmits ourselves, rather than via drbr */
49 #define NO_SLOW_STATS
50 #include <net/if.h>
51 #include <net/if_arp.h>
52 #include <net/ifq_var.h>
53 #include <net/ethernet.h>
54 #include <net/if_dl.h>
55 #include <net/if_media.h>
57 #include <net/bpf.h>
59 #include <net/if_types.h>
60 #include <net/vlan/if_vlan_var.h>
61 #include <net/zlib.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/ip.h>
66 #include <netinet/tcp.h>
68 #include <sys/bus.h>
69 #include <sys/rman.h>
71 #include <bus/pci/pcireg.h>
72 #include <bus/pci/pcivar.h>
73 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
75 #include <vm/vm.h> /* for pmap_mapdev() */
76 #include <vm/pmap.h>
78 #if defined(__i386) || defined(__amd64)
79 #include <machine/specialreg.h>
80 #endif
82 #include <dev/netif/mxge/mxge_mcp.h>
83 #include <dev/netif/mxge/mcp_gen_header.h>
84 /*#define MXGE_FAKE_IFP*/
85 #include <dev/netif/mxge/if_mxge_var.h>
86 #ifdef IFNET_BUF_RING
87 #include <sys/buf_ring.h>
88 #endif
90 #include "opt_inet.h"
92 /* tunable params */
93 static int mxge_nvidia_ecrc_enable = 1;
94 static int mxge_force_firmware = 0;
95 static int mxge_intr_coal_delay = 30;
96 static int mxge_deassert_wait = 1;
97 static int mxge_flow_control = 1;
98 static int mxge_verbose = 0;
99 static int mxge_lro_cnt = 8;
100 static int mxge_ticks;
101 static int mxge_max_slices = 1;
102 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
103 static int mxge_always_promisc = 0;
104 static int mxge_initial_mtu = ETHERMTU_JUMBO;
105 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
106 static char *mxge_fw_aligned = "mxge_eth_z8e";
107 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
108 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
110 static int mxge_probe(device_t dev);
111 static int mxge_attach(device_t dev);
112 static int mxge_detach(device_t dev);
113 static int mxge_shutdown(device_t dev);
114 static void mxge_intr(void *arg);
116 static device_method_t mxge_methods[] =
118 /* Device interface */
119 DEVMETHOD(device_probe, mxge_probe),
120 DEVMETHOD(device_attach, mxge_attach),
121 DEVMETHOD(device_detach, mxge_detach),
122 DEVMETHOD(device_shutdown, mxge_shutdown),
123 {0, 0}
126 static driver_t mxge_driver =
128 "mxge",
129 mxge_methods,
130 sizeof(mxge_softc_t),
133 static devclass_t mxge_devclass;
135 /* Declare ourselves to be a child of the PCI bus.*/
136 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
137 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
138 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
140 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
141 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
142 static int mxge_close(mxge_softc_t *sc);
143 static int mxge_open(mxge_softc_t *sc);
144 static void mxge_tick(void *arg);
146 /* XXX: we don't have Large Receive Offload support yet */
147 inline int
148 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
150 (void)ss;
151 (void)m_head;
152 (void)csum;
153 return 1;
156 inline void
157 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
159 (void)ss;
160 (void)lro;
163 static int
164 mxge_probe(device_t dev)
166 int rev;
169 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
170 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
171 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
172 rev = pci_get_revid(dev);
173 switch (rev) {
174 case MXGE_PCI_REV_Z8E:
175 device_set_desc(dev, "Myri10G-PCIE-8A");
176 break;
177 case MXGE_PCI_REV_Z8ES:
178 device_set_desc(dev, "Myri10G-PCIE-8B");
179 break;
180 default:
181 device_set_desc(dev, "Myri10G-PCIE-8??");
182 device_printf(dev, "Unrecognized rev %d NIC\n",
183 rev);
184 break;
186 return 0;
188 return ENXIO;
191 static void
192 mxge_enable_wc(mxge_softc_t *sc)
194 #if 0
195 #if defined(__i386) || defined(__amd64)
196 vm_offset_t len;
197 int err;
199 sc->wc = 1;
200 len = rman_get_size(sc->mem_res);
201 err = pmap_change_attr((vm_offset_t) sc->sram,
202 len, PAT_WRITE_COMBINING);
203 if (err != 0) {
204 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
205 err);
206 sc->wc = 0;
208 #endif
209 #else
210 sc->wc = 0; /* TBD: PAT support */
211 #endif
215 /* callback to get our DMA address */
216 static void
217 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
218 int error)
220 if (error == 0) {
221 *(bus_addr_t *) arg = segs->ds_addr;
225 static int
226 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
227 bus_size_t alignment)
229 int err;
230 device_t dev = sc->dev;
231 bus_size_t boundary, maxsegsize;
233 if (bytes > 4096 && alignment == 4096) {
234 boundary = 0;
235 maxsegsize = bytes;
236 } else {
237 boundary = 4096;
238 maxsegsize = 4096;
241 /* allocate DMAable memory tags */
242 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
243 alignment, /* alignment */
244 boundary, /* boundary */
245 BUS_SPACE_MAXADDR, /* low */
246 BUS_SPACE_MAXADDR, /* high */
247 NULL, NULL, /* filter */
248 bytes, /* maxsize */
249 1, /* num segs */
250 maxsegsize, /* maxsegsize */
251 BUS_DMA_COHERENT, /* flags */
252 &dma->dmat); /* tag */
253 if (err != 0) {
254 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
255 return err;
258 /* allocate DMAable memory & map */
259 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
260 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
261 | BUS_DMA_ZERO), &dma->map);
262 if (err != 0) {
263 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
264 goto abort_with_dmat;
267 /* load the memory */
268 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
269 mxge_dmamap_callback,
270 (void *)&dma->bus_addr, 0);
271 if (err != 0) {
272 device_printf(dev, "couldn't load map (err = %d)\n", err);
273 goto abort_with_mem;
275 return 0;
277 abort_with_mem:
278 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
279 abort_with_dmat:
280 (void)bus_dma_tag_destroy(dma->dmat);
281 return err;
285 static void
286 mxge_dma_free(mxge_dma_t *dma)
288 bus_dmamap_unload(dma->dmat, dma->map);
289 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
290 (void)bus_dma_tag_destroy(dma->dmat);
294 * The eeprom strings on the lanaiX have the format
295 * SN=x\0
296 * MAC=x:x:x:x:x:x\0
297 * PC=text\0
300 static int
301 mxge_parse_strings(mxge_softc_t *sc)
303 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
305 char *ptr, *limit;
306 int i, found_mac;
308 ptr = sc->eeprom_strings;
309 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
310 found_mac = 0;
311 while (ptr < limit && *ptr != '\0') {
312 if (memcmp(ptr, "MAC=", 4) == 0) {
313 ptr += 1;
314 sc->mac_addr_string = ptr;
315 for (i = 0; i < 6; i++) {
316 ptr += 3;
317 if ((ptr + 2) > limit)
318 goto abort;
319 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
320 found_mac = 1;
322 } else if (memcmp(ptr, "PC=", 3) == 0) {
323 ptr += 3;
324 strncpy(sc->product_code_string, ptr,
325 sizeof (sc->product_code_string) - 1);
326 } else if (memcmp(ptr, "SN=", 3) == 0) {
327 ptr += 3;
328 strncpy(sc->serial_number_string, ptr,
329 sizeof (sc->serial_number_string) - 1);
331 MXGE_NEXT_STRING(ptr);
334 if (found_mac)
335 return 0;
337 abort:
338 device_printf(sc->dev, "failed to parse eeprom_strings\n");
340 return ENXIO;
343 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
344 static void
345 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
347 uint32_t val;
348 unsigned long base, off;
349 char *va, *cfgptr;
350 device_t pdev, mcp55;
351 uint16_t vendor_id, device_id, word;
352 uintptr_t bus, slot, func, ivend, idev;
353 uint32_t *ptr32;
356 if (!mxge_nvidia_ecrc_enable)
357 return;
359 pdev = device_get_parent(device_get_parent(sc->dev));
360 if (pdev == NULL) {
361 device_printf(sc->dev, "could not find parent?\n");
362 return;
364 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
365 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
367 if (vendor_id != 0x10de)
368 return;
370 base = 0;
372 if (device_id == 0x005d) {
373 /* ck804, base address is magic */
374 base = 0xe0000000UL;
375 } else if (device_id >= 0x0374 && device_id <= 0x378) {
376 /* mcp55, base address stored in chipset */
377 mcp55 = pci_find_bsf(0, 0, 0);
378 if (mcp55 &&
379 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
380 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
381 word = pci_read_config(mcp55, 0x90, 2);
382 base = ((unsigned long)word & 0x7ffeU) << 25;
385 if (!base)
386 return;
388 /* XXXX
389 Test below is commented because it is believed that doing
390 config read/write beyond 0xff will access the config space
391 for the next larger function. Uncomment this and remove
392 the hacky pmap_mapdev() way of accessing config space when
393 FreeBSD grows support for extended pcie config space access
395 #if 0
396 /* See if we can, by some miracle, access the extended
397 config space */
398 val = pci_read_config(pdev, 0x178, 4);
399 if (val != 0xffffffff) {
400 val |= 0x40;
401 pci_write_config(pdev, 0x178, val, 4);
402 return;
404 #endif
405 /* Rather than using normal pci config space writes, we must
406 * map the Nvidia config space ourselves. This is because on
407 * opteron/nvidia class machine the 0xe000000 mapping is
408 * handled by the nvidia chipset, that means the internal PCI
409 * device (the on-chip northbridge), or the amd-8131 bridge
410 * and things behind them are not visible by this method.
413 BUS_READ_IVAR(device_get_parent(pdev), pdev,
414 PCI_IVAR_BUS, &bus);
415 BUS_READ_IVAR(device_get_parent(pdev), pdev,
416 PCI_IVAR_SLOT, &slot);
417 BUS_READ_IVAR(device_get_parent(pdev), pdev,
418 PCI_IVAR_FUNCTION, &func);
419 BUS_READ_IVAR(device_get_parent(pdev), pdev,
420 PCI_IVAR_VENDOR, &ivend);
421 BUS_READ_IVAR(device_get_parent(pdev), pdev,
422 PCI_IVAR_DEVICE, &idev);
424 off = base
425 + 0x00100000UL * (unsigned long)bus
426 + 0x00001000UL * (unsigned long)(func
427 + 8 * slot);
429 /* map it into the kernel */
430 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
433 if (va == NULL) {
434 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
435 return;
437 /* get a pointer to the config space mapped into the kernel */
438 cfgptr = va + (off & PAGE_MASK);
440 /* make sure that we can really access it */
441 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
442 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
443 if (! (vendor_id == ivend && device_id == idev)) {
444 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
445 vendor_id, device_id);
446 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
447 return;
450 ptr32 = (uint32_t*)(cfgptr + 0x178);
451 val = *ptr32;
453 if (val == 0xffffffff) {
454 device_printf(sc->dev, "extended mapping failed\n");
455 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
456 return;
458 *ptr32 = val | 0x40;
459 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
460 if (mxge_verbose)
461 device_printf(sc->dev,
462 "Enabled ECRC on upstream Nvidia bridge "
463 "at %d:%d:%d\n",
464 (int)bus, (int)slot, (int)func);
465 return;
467 #else
468 static void
469 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
471 device_printf(sc->dev,
472 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
473 return;
475 #endif
478 static int
479 mxge_dma_test(mxge_softc_t *sc, int test_type)
481 mxge_cmd_t cmd;
482 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
483 int status;
484 uint32_t len;
485 char *test = " ";
488 /* Run a small DMA test.
489 * The magic multipliers to the length tell the firmware
490 * to do DMA read, write, or read+write tests. The
491 * results are returned in cmd.data0. The upper 16
492 * bits of the return is the number of transfers completed.
493 * The lower 16 bits is the time in 0.5us ticks that the
494 * transfers took to complete.
497 len = sc->tx_boundary;
499 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
500 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
501 cmd.data2 = len * 0x10000;
502 status = mxge_send_cmd(sc, test_type, &cmd);
503 if (status != 0) {
504 test = "read";
505 goto abort;
507 sc->read_dma = ((cmd.data0>>16) * len * 2) /
508 (cmd.data0 & 0xffff);
509 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
510 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
511 cmd.data2 = len * 0x1;
512 status = mxge_send_cmd(sc, test_type, &cmd);
513 if (status != 0) {
514 test = "write";
515 goto abort;
517 sc->write_dma = ((cmd.data0>>16) * len * 2) /
518 (cmd.data0 & 0xffff);
520 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
521 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
522 cmd.data2 = len * 0x10001;
523 status = mxge_send_cmd(sc, test_type, &cmd);
524 if (status != 0) {
525 test = "read/write";
526 goto abort;
528 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
529 (cmd.data0 & 0xffff);
531 abort:
532 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
533 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
534 test, status);
536 return status;
540 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
541 * when the PCI-E Completion packets are aligned on an 8-byte
542 * boundary. Some PCI-E chip sets always align Completion packets; on
543 * the ones that do not, the alignment can be enforced by enabling
544 * ECRC generation (if supported).
546 * When PCI-E Completion packets are not aligned, it is actually more
547 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
549 * If the driver can neither enable ECRC nor verify that it has
550 * already been enabled, then it must use a firmware image which works
551 * around unaligned completion packets (ethp_z8e.dat), and it should
552 * also ensure that it never gives the device a Read-DMA which is
553 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
554 * enabled, then the driver should use the aligned (eth_z8e.dat)
555 * firmware image, and set tx_boundary to 4KB.
558 static int
559 mxge_firmware_probe(mxge_softc_t *sc)
561 device_t dev = sc->dev;
562 int reg, status;
563 uint16_t pectl;
565 sc->tx_boundary = 4096;
567 * Verify the max read request size was set to 4KB
568 * before trying the test with 4KB.
570 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
571 pectl = pci_read_config(dev, reg + 0x8, 2);
572 if ((pectl & (5 << 12)) != (5 << 12)) {
573 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
574 pectl);
575 sc->tx_boundary = 2048;
580 * load the optimized firmware (which assumes aligned PCIe
581 * completions) in order to see if it works on this host.
583 sc->fw_name = mxge_fw_aligned;
584 status = mxge_load_firmware(sc, 1);
585 if (status != 0) {
586 return status;
590 * Enable ECRC if possible
592 mxge_enable_nvidia_ecrc(sc);
595 * Run a DMA test which watches for unaligned completions and
596 * aborts on the first one seen.
599 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
600 if (status == 0)
601 return 0; /* keep the aligned firmware */
603 if (status != E2BIG)
604 device_printf(dev, "DMA test failed: %d\n", status);
605 if (status == ENOSYS)
606 device_printf(dev, "Falling back to ethp! "
607 "Please install up to date fw\n");
608 return status;
611 static int
612 mxge_select_firmware(mxge_softc_t *sc)
614 int aligned = 0;
617 if (mxge_force_firmware != 0) {
618 if (mxge_force_firmware == 1)
619 aligned = 1;
620 else
621 aligned = 0;
622 if (mxge_verbose)
623 device_printf(sc->dev,
624 "Assuming %s completions (forced)\n",
625 aligned ? "aligned" : "unaligned");
626 goto abort;
629 /* if the PCIe link width is 4 or less, we can use the aligned
630 firmware and skip any checks */
631 if (sc->link_width != 0 && sc->link_width <= 4) {
632 device_printf(sc->dev,
633 "PCIe x%d Link, expect reduced performance\n",
634 sc->link_width);
635 aligned = 1;
636 goto abort;
639 if (0 == mxge_firmware_probe(sc))
640 return 0;
642 abort:
643 if (aligned) {
644 sc->fw_name = mxge_fw_aligned;
645 sc->tx_boundary = 4096;
646 } else {
647 sc->fw_name = mxge_fw_unaligned;
648 sc->tx_boundary = 2048;
650 return (mxge_load_firmware(sc, 0));
653 union qualhack
655 const char *ro_char;
656 char *rw_char;
659 static int
660 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
664 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
665 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
666 be32toh(hdr->mcp_type));
667 return EIO;
670 /* save firmware version for sysctl */
671 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
672 if (mxge_verbose)
673 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
675 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
676 &sc->fw_ver_minor, &sc->fw_ver_tiny);
678 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
679 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
680 device_printf(sc->dev, "Found firmware version %s\n",
681 sc->fw_version);
682 device_printf(sc->dev, "Driver needs %d.%d\n",
683 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
684 return EINVAL;
686 return 0;
690 #if 0
691 static void *
692 z_alloc(void *nil, u_int items, u_int size)
694 void *ptr;
696 ptr = kmalloc(items * size, M_TEMP, M_NOWAIT);
697 return ptr;
700 static void
701 z_free(void *nil, void *ptr)
703 kfree(ptr, M_TEMP);
705 #endif
707 static int
708 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
710 struct fw_image *fw;
711 const mcp_gen_header_t *hdr;
712 unsigned hdr_offset;
713 int status;
714 unsigned int i;
715 char dummy;
716 size_t fw_len;
718 fw = firmware_image_load(sc->fw_name, NULL);
719 if (fw == NULL) {
720 device_printf(sc->dev, "Could not find firmware image %s\n",
721 sc->fw_name);
722 return ENOENT;
724 #if 0
725 /* setup zlib and decompress f/w */
726 bzero(&zs, sizeof (zs));
727 zs.zalloc = z_alloc;
728 zs.zfree = z_free;
729 status = inflateInit(&zs);
730 if (status != Z_OK) {
731 status = EIO;
732 goto abort_with_fw;
735 /* the uncompressed size is stored as the firmware version,
736 which would otherwise go unused */
737 fw_len = (size_t) fw->version;
738 inflate_buffer = kmalloc(fw_len, M_TEMP, M_NOWAIT);
739 if (inflate_buffer == NULL)
740 goto abort_with_zs;
741 zs.avail_in = fw->datasize;
742 zs.next_in = __DECONST(char *, fw->data);
743 zs.avail_out = fw_len;
744 zs.next_out = inflate_buffer;
745 status = inflate(&zs, Z_FINISH);
746 if (status != Z_STREAM_END) {
747 device_printf(sc->dev, "zlib %d\n", status);
748 status = EIO;
749 goto abort_with_buffer;
751 #endif
752 fw_len = fw->fw_imglen;
753 /* check id */
754 hdr_offset = htobe32(*(const uint32_t *)
755 (fw->fw_image + MCP_HEADER_PTR_OFFSET));
756 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
757 device_printf(sc->dev, "Bad firmware file");
758 status = EIO;
759 goto abort_with_fw;
761 hdr = (const void*)(fw->fw_image + hdr_offset);
763 status = mxge_validate_firmware(sc, hdr);
764 if (status != 0)
765 goto abort_with_fw;
767 /* Copy the inflated firmware to NIC SRAM. */
768 for (i = 0; i < fw_len; i += 256) {
769 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
770 fw->fw_image + i,
771 min(256U, (unsigned)(fw_len - i)));
772 wmb();
773 dummy = *sc->sram;
774 wmb();
777 *limit = fw_len;
778 status = 0;
779 #if 0
780 abort_with_buffer:
781 kfree(inflate_buffer, M_TEMP);
782 abort_with_zs:
783 inflateEnd(&zs);
784 #endif
785 abort_with_fw:
786 firmware_image_unload(fw);
787 return status;
791 * Enable or disable periodic RDMAs from the host to make certain
792 * chipsets resend dropped PCIe messages
795 static void
796 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
798 char buf_bytes[72];
799 volatile uint32_t *confirm;
800 volatile char *submit;
801 uint32_t *buf, dma_low, dma_high;
802 int i;
804 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
806 /* clear confirmation addr */
807 confirm = (volatile uint32_t *)sc->cmd;
808 *confirm = 0;
809 wmb();
811 /* send an rdma command to the PCIe engine, and wait for the
812 response in the confirmation address. The firmware should
813 write a -1 there to indicate it is alive and well
816 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
817 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
818 buf[0] = htobe32(dma_high); /* confirm addr MSW */
819 buf[1] = htobe32(dma_low); /* confirm addr LSW */
820 buf[2] = htobe32(0xffffffff); /* confirm data */
821 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
822 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
823 buf[3] = htobe32(dma_high); /* dummy addr MSW */
824 buf[4] = htobe32(dma_low); /* dummy addr LSW */
825 buf[5] = htobe32(enable); /* enable? */
828 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
830 mxge_pio_copy(submit, buf, 64);
831 wmb();
832 DELAY(1000);
833 wmb();
834 i = 0;
835 while (*confirm != 0xffffffff && i < 20) {
836 DELAY(1000);
837 i++;
839 if (*confirm != 0xffffffff) {
840 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
841 (enable ? "enable" : "disable"), confirm,
842 *confirm);
844 return;
847 static int
848 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
850 mcp_cmd_t *buf;
851 char buf_bytes[sizeof(*buf) + 8];
852 volatile mcp_cmd_response_t *response = sc->cmd;
853 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
854 uint32_t dma_low, dma_high;
855 int err, sleep_total = 0;
857 /* ensure buf is aligned to 8 bytes */
858 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
860 buf->data0 = htobe32(data->data0);
861 buf->data1 = htobe32(data->data1);
862 buf->data2 = htobe32(data->data2);
863 buf->cmd = htobe32(cmd);
864 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
865 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
867 buf->response_addr.low = htobe32(dma_low);
868 buf->response_addr.high = htobe32(dma_high);
870 lwkt_serialize_enter(sc->ifp->if_serializer);
872 response->result = 0xffffffff;
873 wmb();
874 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
876 /* wait up to 20ms */
877 err = EAGAIN;
878 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
879 bus_dmamap_sync(sc->cmd_dma.dmat,
880 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
881 wmb();
882 switch (be32toh(response->result)) {
883 case 0:
884 data->data0 = be32toh(response->data);
885 err = 0;
886 break;
887 case 0xffffffff:
888 DELAY(1000);
889 break;
890 case MXGEFW_CMD_UNKNOWN:
891 err = ENOSYS;
892 break;
893 case MXGEFW_CMD_ERROR_UNALIGNED:
894 err = E2BIG;
895 break;
896 case MXGEFW_CMD_ERROR_BUSY:
897 err = EBUSY;
898 break;
899 default:
900 device_printf(sc->dev,
901 "mxge: command %d "
902 "failed, result = %d\n",
903 cmd, be32toh(response->result));
904 err = ENXIO;
905 break;
907 if (err != EAGAIN)
908 break;
910 if (err == EAGAIN)
911 device_printf(sc->dev, "mxge: command %d timed out"
912 "result = %d\n",
913 cmd, be32toh(response->result));
914 lwkt_serialize_exit(sc->ifp->if_serializer);
915 return err;
918 static int
919 mxge_adopt_running_firmware(mxge_softc_t *sc)
921 struct mcp_gen_header *hdr;
922 const size_t bytes = sizeof (struct mcp_gen_header);
923 size_t hdr_offset;
924 int status;
926 /* find running firmware header */
927 hdr_offset = htobe32(*(volatile uint32_t *)
928 (sc->sram + MCP_HEADER_PTR_OFFSET));
930 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
931 device_printf(sc->dev,
932 "Running firmware has bad header offset (%d)\n",
933 (int)hdr_offset);
934 return EIO;
937 /* copy header of running firmware from SRAM to host memory to
938 * validate firmware */
939 hdr = kmalloc(bytes, M_DEVBUF, M_NOWAIT);
940 if (hdr == NULL) {
941 device_printf(sc->dev, "could not kmalloc firmware hdr\n");
942 return ENOMEM;
944 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
945 rman_get_bushandle(sc->mem_res),
946 hdr_offset, (char *)hdr, bytes);
947 status = mxge_validate_firmware(sc, hdr);
948 kfree(hdr, M_DEVBUF);
951 * check to see if adopted firmware has bug where adopting
952 * it will cause broadcasts to be filtered unless the NIC
953 * is kept in ALLMULTI mode
955 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
956 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
957 sc->adopted_rx_filter_bug = 1;
958 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
959 "working around rx filter bug\n",
960 sc->fw_ver_major, sc->fw_ver_minor,
961 sc->fw_ver_tiny);
964 return status;
968 static int
969 mxge_load_firmware(mxge_softc_t *sc, int adopt)
971 volatile uint32_t *confirm;
972 volatile char *submit;
973 char buf_bytes[72];
974 uint32_t *buf, size, dma_low, dma_high;
975 int status, i;
977 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
979 size = sc->sram_size;
980 status = mxge_load_firmware_helper(sc, &size);
981 if (status) {
982 if (!adopt)
983 return status;
984 /* Try to use the currently running firmware, if
985 it is new enough */
986 status = mxge_adopt_running_firmware(sc);
987 if (status) {
988 device_printf(sc->dev,
989 "failed to adopt running firmware\n");
990 return status;
992 device_printf(sc->dev,
993 "Successfully adopted running firmware\n");
994 if (sc->tx_boundary == 4096) {
995 device_printf(sc->dev,
996 "Using firmware currently running on NIC"
997 ". For optimal\n");
998 device_printf(sc->dev,
999 "performance consider loading optimized "
1000 "firmware\n");
1002 sc->fw_name = mxge_fw_unaligned;
1003 sc->tx_boundary = 2048;
1004 return 0;
1006 /* clear confirmation addr */
1007 confirm = (volatile uint32_t *)sc->cmd;
1008 *confirm = 0;
1009 wmb();
1010 /* send a reload command to the bootstrap MCP, and wait for the
1011 response in the confirmation address. The firmware should
1012 write a -1 there to indicate it is alive and well
1015 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1016 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1018 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1019 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1020 buf[2] = htobe32(0xffffffff); /* confirm data */
1022 /* FIX: All newest firmware should un-protect the bottom of
1023 the sram before handoff. However, the very first interfaces
1024 do not. Therefore the handoff copy must skip the first 8 bytes
1026 /* where the code starts*/
1027 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1028 buf[4] = htobe32(size - 8); /* length of code */
1029 buf[5] = htobe32(8); /* where to copy to */
1030 buf[6] = htobe32(0); /* where to jump to */
1032 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1033 mxge_pio_copy(submit, buf, 64);
1034 wmb();
1035 DELAY(1000);
1036 wmb();
1037 i = 0;
1038 while (*confirm != 0xffffffff && i < 20) {
1039 DELAY(1000*10);
1040 i++;
1041 bus_dmamap_sync(sc->cmd_dma.dmat,
1042 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1044 if (*confirm != 0xffffffff) {
1045 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1046 confirm, *confirm);
1048 return ENXIO;
1050 return 0;
1053 static int
1054 mxge_update_mac_address(mxge_softc_t *sc)
1056 mxge_cmd_t cmd;
1057 uint8_t *addr = sc->mac_addr;
1058 int status;
1061 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1062 | (addr[2] << 8) | addr[3]);
1064 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1066 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1067 return status;
1070 static int
1071 mxge_change_pause(mxge_softc_t *sc, int pause)
1073 mxge_cmd_t cmd;
1074 int status;
1076 if (pause)
1077 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1078 &cmd);
1079 else
1080 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1081 &cmd);
1083 if (status) {
1084 device_printf(sc->dev, "Failed to set flow control mode\n");
1085 return ENXIO;
1087 sc->pause = pause;
1088 return 0;
1091 static void
1092 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1094 mxge_cmd_t cmd;
1095 int status;
1097 if (mxge_always_promisc)
1098 promisc = 1;
1100 if (promisc)
1101 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1102 &cmd);
1103 else
1104 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1105 &cmd);
1107 if (status) {
1108 device_printf(sc->dev, "Failed to set promisc mode\n");
1112 static void
1113 mxge_set_multicast_list(mxge_softc_t *sc)
1115 mxge_cmd_t cmd;
1116 struct ifmultiaddr *ifma;
1117 struct ifnet *ifp = sc->ifp;
1118 int err;
1120 /* This firmware is known to not support multicast */
1121 if (!sc->fw_multicast_support)
1122 return;
1124 /* Disable multicast filtering while we play with the lists*/
1125 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1126 if (err != 0) {
1127 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1128 " error status: %d\n", err);
1129 return;
1132 if (sc->adopted_rx_filter_bug)
1133 return;
1135 if (ifp->if_flags & IFF_ALLMULTI)
1136 /* request to disable multicast filtering, so quit here */
1137 return;
1139 /* Flush all the filters */
1141 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1142 if (err != 0) {
1143 device_printf(sc->dev,
1144 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1145 ", error status: %d\n", err);
1146 return;
1149 /* Walk the multicast list, and add each address */
1151 lwkt_serialize_enter(ifp->if_serializer);
1152 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1153 if (ifma->ifma_addr->sa_family != AF_LINK)
1154 continue;
1155 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1156 &cmd.data0, 4);
1157 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1158 &cmd.data1, 2);
1159 cmd.data0 = htonl(cmd.data0);
1160 cmd.data1 = htonl(cmd.data1);
1161 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1162 if (err != 0) {
1163 device_printf(sc->dev, "Failed "
1164 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1165 "%d\t", err);
1166 /* abort, leaving multicast filtering off */
1167 lwkt_serialize_exit(ifp->if_serializer);
1168 return;
1171 lwkt_serialize_exit(ifp->if_serializer);
1172 /* Enable multicast filtering */
1173 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1174 if (err != 0) {
1175 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1176 ", error status: %d\n", err);
1180 static int
1181 mxge_max_mtu(mxge_softc_t *sc)
1183 mxge_cmd_t cmd;
1184 int status;
1186 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1187 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1189 /* try to set nbufs to see if it we can
1190 use virtually contiguous jumbos */
1191 cmd.data0 = 0;
1192 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1193 &cmd);
1194 if (status == 0)
1195 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1197 /* otherwise, we're limited to MJUMPAGESIZE */
1198 return MJUMPAGESIZE - MXGEFW_PAD;
1201 static int
1202 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1204 struct mxge_slice_state *ss;
1205 mxge_rx_done_t *rx_done;
1206 volatile uint32_t *irq_claim;
1207 mxge_cmd_t cmd;
1208 int slice, status;
1210 /* try to send a reset command to the card to see if it
1211 is alive */
1212 memset(&cmd, 0, sizeof (cmd));
1213 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1214 if (status != 0) {
1215 device_printf(sc->dev, "failed reset\n");
1216 return ENXIO;
1219 mxge_dummy_rdma(sc, 1);
1222 /* set the intrq size */
1223 cmd.data0 = sc->rx_ring_size;
1224 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1227 * Even though we already know how many slices are supported
1228 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1229 * has magic side effects, and must be called after a reset.
1230 * It must be called prior to calling any RSS related cmds,
1231 * including assigning an interrupt queue for anything but
1232 * slice 0. It must also be called *after*
1233 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1234 * the firmware to compute offsets.
1237 if (sc->num_slices > 1) {
1238 /* ask the maximum number of slices it supports */
1239 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1240 &cmd);
1241 if (status != 0) {
1242 device_printf(sc->dev,
1243 "failed to get number of slices\n");
1244 return status;
1247 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1248 * to setting up the interrupt queue DMA
1250 cmd.data0 = sc->num_slices;
1251 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1252 #ifdef IFNET_BUF_RING
1253 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1254 #endif
1255 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1256 &cmd);
1257 if (status != 0) {
1258 device_printf(sc->dev,
1259 "failed to set number of slices\n");
1260 return status;
1265 if (interrupts_setup) {
1266 /* Now exchange information about interrupts */
1267 for (slice = 0; slice < sc->num_slices; slice++) {
1268 rx_done = &sc->ss[slice].rx_done;
1269 memset(rx_done->entry, 0, sc->rx_ring_size);
1270 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1271 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1272 cmd.data2 = slice;
1273 status |= mxge_send_cmd(sc,
1274 MXGEFW_CMD_SET_INTRQ_DMA,
1275 &cmd);
1279 status |= mxge_send_cmd(sc,
1280 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1283 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1285 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1286 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1289 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1290 &cmd);
1291 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1292 if (status != 0) {
1293 device_printf(sc->dev, "failed set interrupt parameters\n");
1294 return status;
1298 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1301 /* run a DMA benchmark */
1302 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1304 for (slice = 0; slice < sc->num_slices; slice++) {
1305 ss = &sc->ss[slice];
1307 ss->irq_claim = irq_claim + (2 * slice);
1308 /* reset mcp/driver shared state back to 0 */
1309 ss->rx_done.idx = 0;
1310 ss->rx_done.cnt = 0;
1311 ss->tx.req = 0;
1312 ss->tx.done = 0;
1313 ss->tx.pkt_done = 0;
1314 ss->tx.queue_active = 0;
1315 ss->tx.activate = 0;
1316 ss->tx.deactivate = 0;
1317 ss->tx.wake = 0;
1318 ss->tx.defrag = 0;
1319 ss->tx.stall = 0;
1320 ss->rx_big.cnt = 0;
1321 ss->rx_small.cnt = 0;
1322 ss->lro_bad_csum = 0;
1323 ss->lro_queued = 0;
1324 ss->lro_flushed = 0;
1325 if (ss->fw_stats != NULL) {
1326 ss->fw_stats->valid = 0;
1327 ss->fw_stats->send_done_count = 0;
1330 sc->rdma_tags_available = 15;
1331 status = mxge_update_mac_address(sc);
1332 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1333 mxge_change_pause(sc, sc->pause);
1334 mxge_set_multicast_list(sc);
1335 return status;
1338 static int
1339 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1341 mxge_softc_t *sc;
1342 unsigned int intr_coal_delay;
1343 int err;
1345 sc = arg1;
1346 intr_coal_delay = sc->intr_coal_delay;
1347 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1348 if (err != 0) {
1349 return err;
1351 if (intr_coal_delay == sc->intr_coal_delay)
1352 return 0;
1354 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1355 return EINVAL;
1357 lwkt_serialize_enter(sc->ifp->if_serializer);
1358 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1359 sc->intr_coal_delay = intr_coal_delay;
1361 lwkt_serialize_exit(sc->ifp->if_serializer);
1362 return err;
1365 static int
1366 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1368 mxge_softc_t *sc;
1369 unsigned int enabled;
1370 int err;
1372 sc = arg1;
1373 enabled = sc->pause;
1374 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1375 if (err != 0) {
1376 return err;
1378 if (enabled == sc->pause)
1379 return 0;
1381 lwkt_serialize_enter(sc->ifp->if_serializer);
1382 err = mxge_change_pause(sc, enabled);
1383 lwkt_serialize_exit(sc->ifp->if_serializer);
1384 return err;
1387 static int
1388 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1390 struct ifnet *ifp;
1391 int err = 0;
1393 ifp = sc->ifp;
1394 if (lro_cnt == 0)
1395 ifp->if_capenable &= ~IFCAP_LRO;
1396 else
1397 ifp->if_capenable |= IFCAP_LRO;
1398 sc->lro_cnt = lro_cnt;
1399 if (ifp->if_flags & IFF_RUNNING) {
1400 mxge_close(sc);
1401 err = mxge_open(sc);
1403 return err;
1406 static int
1407 mxge_change_lro(SYSCTL_HANDLER_ARGS)
1409 mxge_softc_t *sc;
1410 unsigned int lro_cnt;
1411 int err;
1413 sc = arg1;
1414 lro_cnt = sc->lro_cnt;
1415 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1416 if (err != 0)
1417 return err;
1419 if (lro_cnt == sc->lro_cnt)
1420 return 0;
1422 if (lro_cnt > 128)
1423 return EINVAL;
1425 lwkt_serialize_enter(sc->ifp->if_serializer);
1426 err = mxge_change_lro_locked(sc, lro_cnt);
1427 lwkt_serialize_exit(sc->ifp->if_serializer);
1428 return err;
1431 static int
1432 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1434 int err;
1436 if (arg1 == NULL)
1437 return EFAULT;
1438 arg2 = be32toh(*(int *)arg1);
1439 arg1 = NULL;
1440 err = sysctl_handle_int(oidp, arg1, arg2, req);
1442 return err;
1445 static void
1446 mxge_rem_sysctls(mxge_softc_t *sc)
1448 struct mxge_slice_state *ss;
1449 int slice;
1451 if (sc->sysctl_tree != NULL) {
1452 sysctl_ctx_free(&sc->sysctl_ctx);
1453 sc->sysctl_tree = NULL;
1455 if (sc->slice_sysctl_tree == NULL)
1456 return;
1458 for (slice = 0; slice < sc->num_slices; slice++) {
1459 ss = &sc->ss[slice];
1460 if (ss == NULL || ss->sysctl_tree == NULL)
1461 continue;
1462 sysctl_ctx_free(&ss->sysctl_ctx);
1463 ss->sysctl_tree = NULL;
1465 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1466 sc->slice_sysctl_tree = NULL;
1469 static void
1470 mxge_add_sysctls(mxge_softc_t *sc)
1472 struct sysctl_ctx_list *ctx;
1473 struct sysctl_oid_list *children;
1474 mcp_irq_data_t *fw;
1475 struct mxge_slice_state *ss;
1476 int slice;
1477 char slice_num[8];
1479 ctx = &sc->sysctl_ctx;
1480 sysctl_ctx_init(ctx);
1481 sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw),
1482 OID_AUTO,
1483 device_get_nameunit(sc->dev),
1484 CTLFLAG_RD, 0, "");
1485 if (sc->sysctl_tree == NULL) {
1486 device_printf(sc->dev, "can't add sysctl node\n");
1487 return;
1490 children = SYSCTL_CHILDREN(sc->sysctl_tree);
1491 fw = sc->ss[0].fw_stats;
1493 /* random information */
1494 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1495 "firmware_version",
1496 CTLFLAG_RD, &sc->fw_version,
1497 0, "firmware version");
1498 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1499 "serial_number",
1500 CTLFLAG_RD, &sc->serial_number_string,
1501 0, "serial number");
1502 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1503 "product_code",
1504 CTLFLAG_RD, &sc->product_code_string,
1505 0, "product_code");
1506 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1507 "pcie_link_width",
1508 CTLFLAG_RD, &sc->link_width,
1509 0, "tx_boundary");
1510 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1511 "tx_boundary",
1512 CTLFLAG_RD, &sc->tx_boundary,
1513 0, "tx_boundary");
1514 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1515 "write_combine",
1516 CTLFLAG_RD, &sc->wc,
1517 0, "write combining PIO?");
1518 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1519 "read_dma_MBs",
1520 CTLFLAG_RD, &sc->read_dma,
1521 0, "DMA Read speed in MB/s");
1522 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1523 "write_dma_MBs",
1524 CTLFLAG_RD, &sc->write_dma,
1525 0, "DMA Write speed in MB/s");
1526 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1527 "read_write_dma_MBs",
1528 CTLFLAG_RD, &sc->read_write_dma,
1529 0, "DMA concurrent Read/Write speed in MB/s");
1532 /* performance related tunables */
1533 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1534 "intr_coal_delay",
1535 CTLTYPE_INT|CTLFLAG_RW, sc,
1536 0, mxge_change_intr_coal,
1537 "I", "interrupt coalescing delay in usecs");
1539 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1540 "flow_control_enabled",
1541 CTLTYPE_INT|CTLFLAG_RW, sc,
1542 0, mxge_change_flow_control,
1543 "I", "interrupt coalescing delay in usecs");
1545 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1546 "deassert_wait",
1547 CTLFLAG_RW, &mxge_deassert_wait,
1548 0, "Wait for IRQ line to go low in ihandler");
1550 /* stats block from firmware is in network byte order.
1551 Need to swap it */
1552 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1553 "link_up",
1554 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1555 0, mxge_handle_be32,
1556 "I", "link up");
1557 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1558 "rdma_tags_available",
1559 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1560 0, mxge_handle_be32,
1561 "I", "rdma_tags_available");
1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1563 "dropped_bad_crc32",
1564 CTLTYPE_INT|CTLFLAG_RD,
1565 &fw->dropped_bad_crc32,
1566 0, mxge_handle_be32,
1567 "I", "dropped_bad_crc32");
1568 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1569 "dropped_bad_phy",
1570 CTLTYPE_INT|CTLFLAG_RD,
1571 &fw->dropped_bad_phy,
1572 0, mxge_handle_be32,
1573 "I", "dropped_bad_phy");
1574 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1575 "dropped_link_error_or_filtered",
1576 CTLTYPE_INT|CTLFLAG_RD,
1577 &fw->dropped_link_error_or_filtered,
1578 0, mxge_handle_be32,
1579 "I", "dropped_link_error_or_filtered");
1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1581 "dropped_link_overflow",
1582 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1583 0, mxge_handle_be32,
1584 "I", "dropped_link_overflow");
1585 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1586 "dropped_multicast_filtered",
1587 CTLTYPE_INT|CTLFLAG_RD,
1588 &fw->dropped_multicast_filtered,
1589 0, mxge_handle_be32,
1590 "I", "dropped_multicast_filtered");
1591 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1592 "dropped_no_big_buffer",
1593 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1594 0, mxge_handle_be32,
1595 "I", "dropped_no_big_buffer");
1596 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1597 "dropped_no_small_buffer",
1598 CTLTYPE_INT|CTLFLAG_RD,
1599 &fw->dropped_no_small_buffer,
1600 0, mxge_handle_be32,
1601 "I", "dropped_no_small_buffer");
1602 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1603 "dropped_overrun",
1604 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1605 0, mxge_handle_be32,
1606 "I", "dropped_overrun");
1607 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1608 "dropped_pause",
1609 CTLTYPE_INT|CTLFLAG_RD,
1610 &fw->dropped_pause,
1611 0, mxge_handle_be32,
1612 "I", "dropped_pause");
1613 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1614 "dropped_runt",
1615 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1616 0, mxge_handle_be32,
1617 "I", "dropped_runt");
1619 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1620 "dropped_unicast_filtered",
1621 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1622 0, mxge_handle_be32,
1623 "I", "dropped_unicast_filtered");
1625 /* verbose printing? */
1626 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1627 "verbose",
1628 CTLFLAG_RW, &mxge_verbose,
1629 0, "verbose printing");
1631 /* lro */
1632 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1633 "lro_cnt",
1634 CTLTYPE_INT|CTLFLAG_RW, sc,
1635 0, mxge_change_lro,
1636 "I", "number of lro merge queues");
1639 /* add counters exported for debugging from all slices */
1640 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1641 sc->slice_sysctl_tree =
1642 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1643 "slice", CTLFLAG_RD, 0, "");
1645 for (slice = 0; slice < sc->num_slices; slice++) {
1646 ss = &sc->ss[slice];
1647 sysctl_ctx_init(&ss->sysctl_ctx);
1648 ctx = &ss->sysctl_ctx;
1649 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1650 ksprintf(slice_num, "%d", slice);
1651 ss->sysctl_tree =
1652 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1653 CTLFLAG_RD, 0, "");
1654 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1655 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1656 "rx_small_cnt",
1657 CTLFLAG_RD, &ss->rx_small.cnt,
1658 0, "rx_small_cnt");
1659 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1660 "rx_big_cnt",
1661 CTLFLAG_RD, &ss->rx_big.cnt,
1662 0, "rx_small_cnt");
1663 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1664 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1665 0, "number of lro merge queues flushed");
1667 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1668 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1669 0, "number of frames appended to lro merge"
1670 "queues");
1672 #ifndef IFNET_BUF_RING
1673 /* only transmit from slice 0 for now */
1674 if (slice > 0)
1675 continue;
1676 #endif
1677 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1678 "tx_req",
1679 CTLFLAG_RD, &ss->tx.req,
1680 0, "tx_req");
1682 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1683 "tx_done",
1684 CTLFLAG_RD, &ss->tx.done,
1685 0, "tx_done");
1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1687 "tx_pkt_done",
1688 CTLFLAG_RD, &ss->tx.pkt_done,
1689 0, "tx_done");
1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1691 "tx_stall",
1692 CTLFLAG_RD, &ss->tx.stall,
1693 0, "tx_stall");
1694 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1695 "tx_wake",
1696 CTLFLAG_RD, &ss->tx.wake,
1697 0, "tx_wake");
1698 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1699 "tx_defrag",
1700 CTLFLAG_RD, &ss->tx.defrag,
1701 0, "tx_defrag");
1702 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1703 "tx_queue_active",
1704 CTLFLAG_RD, &ss->tx.queue_active,
1705 0, "tx_queue_active");
1706 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1707 "tx_activate",
1708 CTLFLAG_RD, &ss->tx.activate,
1709 0, "tx_activate");
1710 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1711 "tx_deactivate",
1712 CTLFLAG_RD, &ss->tx.deactivate,
1713 0, "tx_deactivate");
1717 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1718 backwards one at a time and handle ring wraps */
1720 static inline void
1721 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1722 mcp_kreq_ether_send_t *src, int cnt)
1724 int idx, starting_slot;
1725 starting_slot = tx->req;
1726 while (cnt > 1) {
1727 cnt--;
1728 idx = (starting_slot + cnt) & tx->mask;
1729 mxge_pio_copy(&tx->lanai[idx],
1730 &src[cnt], sizeof(*src));
1731 wmb();
1736 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1737 * at most 32 bytes at a time, so as to avoid involving the software
1738 * pio handler in the nic. We re-write the first segment's flags
1739 * to mark them valid only after writing the entire chain
1742 static inline void
1743 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1744 int cnt)
1746 int idx, i;
1747 uint32_t *src_ints;
1748 volatile uint32_t *dst_ints;
1749 mcp_kreq_ether_send_t *srcp;
1750 volatile mcp_kreq_ether_send_t *dstp, *dst;
1751 uint8_t last_flags;
1753 idx = tx->req & tx->mask;
1755 last_flags = src->flags;
1756 src->flags = 0;
1757 wmb();
1758 dst = dstp = &tx->lanai[idx];
1759 srcp = src;
1761 if ((idx + cnt) < tx->mask) {
1762 for (i = 0; i < (cnt - 1); i += 2) {
1763 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1764 wmb(); /* force write every 32 bytes */
1765 srcp += 2;
1766 dstp += 2;
1768 } else {
1769 /* submit all but the first request, and ensure
1770 that it is submitted below */
1771 mxge_submit_req_backwards(tx, src, cnt);
1772 i = 0;
1774 if (i < cnt) {
1775 /* submit the first request */
1776 mxge_pio_copy(dstp, srcp, sizeof(*src));
1777 wmb(); /* barrier before setting valid flag */
1780 /* re-write the last 32-bits with the valid flags */
1781 src->flags = last_flags;
1782 src_ints = (uint32_t *)src;
1783 src_ints+=3;
1784 dst_ints = (volatile uint32_t *)dst;
1785 dst_ints+=3;
1786 *dst_ints = *src_ints;
1787 tx->req += cnt;
1788 wmb();
1791 #if IFCAP_TSO4
1793 static void
1794 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1795 int busdma_seg_cnt, int ip_off)
1797 mxge_tx_ring_t *tx;
1798 mcp_kreq_ether_send_t *req;
1799 bus_dma_segment_t *seg;
1800 struct ip *ip;
1801 struct tcphdr *tcp;
1802 uint32_t low, high_swapped;
1803 int len, seglen, cum_len, cum_len_next;
1804 int next_is_first, chop, cnt, rdma_count, small;
1805 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1806 uint8_t flags, flags_next;
1807 static int once;
1809 mss = m->m_pkthdr.tso_segsz;
1811 /* negative cum_len signifies to the
1812 * send loop that we are still in the
1813 * header portion of the TSO packet.
1816 /* ensure we have the ethernet, IP and TCP
1817 header together in the first mbuf, copy
1818 it to a scratch buffer if not */
1819 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1820 m_copydata(m, 0, ip_off + sizeof (*ip),
1821 ss->scratch);
1822 ip = (struct ip *)(ss->scratch + ip_off);
1823 } else {
1824 ip = (struct ip *)(mtod(m, char *) + ip_off);
1826 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1827 + sizeof (*tcp))) {
1828 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1829 + sizeof (*tcp), ss->scratch);
1830 ip = (struct ip *)(mtod(m, char *) + ip_off);
1833 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1834 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1836 /* TSO implies checksum offload on this hardware */
1837 cksum_offset = ip_off + (ip->ip_hl << 2);
1838 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1841 /* for TSO, pseudo_hdr_offset holds mss.
1842 * The firmware figures out where to put
1843 * the checksum by parsing the header. */
1844 pseudo_hdr_offset = htobe16(mss);
1846 tx = &ss->tx;
1847 req = tx->req_list;
1848 seg = tx->seg_list;
1849 cnt = 0;
1850 rdma_count = 0;
1851 /* "rdma_count" is the number of RDMAs belonging to the
1852 * current packet BEFORE the current send request. For
1853 * non-TSO packets, this is equal to "count".
1854 * For TSO packets, rdma_count needs to be reset
1855 * to 0 after a segment cut.
1857 * The rdma_count field of the send request is
1858 * the number of RDMAs of the packet starting at
1859 * that request. For TSO send requests with one ore more cuts
1860 * in the middle, this is the number of RDMAs starting
1861 * after the last cut in the request. All previous
1862 * segments before the last cut implicitly have 1 RDMA.
1864 * Since the number of RDMAs is not known beforehand,
1865 * it must be filled-in retroactively - after each
1866 * segmentation cut or at the end of the entire packet.
1869 while (busdma_seg_cnt) {
1870 /* Break the busdma segment up into pieces*/
1871 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1872 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1873 len = seg->ds_len;
1875 while (len) {
1876 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1877 seglen = len;
1878 cum_len_next = cum_len + seglen;
1879 (req-rdma_count)->rdma_count = rdma_count + 1;
1880 if (__predict_true(cum_len >= 0)) {
1881 /* payload */
1882 chop = (cum_len_next > mss);
1883 cum_len_next = cum_len_next % mss;
1884 next_is_first = (cum_len_next == 0);
1885 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1886 flags_next |= next_is_first *
1887 MXGEFW_FLAGS_FIRST;
1888 rdma_count |= -(chop | next_is_first);
1889 rdma_count += chop & !next_is_first;
1890 } else if (cum_len_next >= 0) {
1891 /* header ends */
1892 rdma_count = -1;
1893 cum_len_next = 0;
1894 seglen = -cum_len;
1895 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1896 flags_next = MXGEFW_FLAGS_TSO_PLD |
1897 MXGEFW_FLAGS_FIRST |
1898 (small * MXGEFW_FLAGS_SMALL);
1901 req->addr_high = high_swapped;
1902 req->addr_low = htobe32(low);
1903 req->pseudo_hdr_offset = pseudo_hdr_offset;
1904 req->pad = 0;
1905 req->rdma_count = 1;
1906 req->length = htobe16(seglen);
1907 req->cksum_offset = cksum_offset;
1908 req->flags = flags | ((cum_len & 1) *
1909 MXGEFW_FLAGS_ALIGN_ODD);
1910 low += seglen;
1911 len -= seglen;
1912 cum_len = cum_len_next;
1913 flags = flags_next;
1914 req++;
1915 cnt++;
1916 rdma_count++;
1917 if (__predict_false(cksum_offset > seglen))
1918 cksum_offset -= seglen;
1919 else
1920 cksum_offset = 0;
1921 if (__predict_false(cnt > tx->max_desc))
1922 goto drop;
1924 busdma_seg_cnt--;
1925 seg++;
1927 (req-rdma_count)->rdma_count = rdma_count;
1929 do {
1930 req--;
1931 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1932 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1934 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1935 mxge_submit_req(tx, tx->req_list, cnt);
1936 #ifdef IFNET_BUF_RING
1937 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1938 /* tell the NIC to start polling this slice */
1939 *tx->send_go = 1;
1940 tx->queue_active = 1;
1941 tx->activate++;
1942 wmb();
1944 #endif
1945 return;
1947 drop:
1948 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1949 m_freem(m);
1950 ss->oerrors++;
1951 if (!once) {
1952 kprintf("tx->max_desc exceeded via TSO!\n");
1953 kprintf("mss = %d, %ld, %d!\n", mss,
1954 (long)seg - (long)tx->seg_list, tx->max_desc);
1955 once = 1;
1957 return;
1961 #endif /* IFCAP_TSO4 */
1963 #ifdef MXGE_NEW_VLAN_API
1965 * We reproduce the software vlan tag insertion from
1966 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1967 * vlan tag insertion. We need to advertise this in order to have the
1968 * vlan interface respect our csum offload flags.
1970 static struct mbuf *
1971 mxge_vlan_tag_insert(struct mbuf *m)
1973 struct ether_vlan_header *evl;
1975 M_PREPEND(m, EVL_ENCAPLEN, MB_DONTWAIT);
1976 if (__predict_false(m == NULL))
1977 return NULL;
1978 if (m->m_len < sizeof(*evl)) {
1979 m = m_pullup(m, sizeof(*evl));
1980 if (__predict_false(m == NULL))
1981 return NULL;
1984 * Transform the Ethernet header into an Ethernet header
1985 * with 802.1Q encapsulation.
1987 evl = mtod(m, struct ether_vlan_header *);
1988 bcopy((char *)evl + EVL_ENCAPLEN,
1989 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1990 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1991 evl->evl_tag = htons(m->m_pkthdr.ether_vlantag);
1992 m->m_flags &= ~M_VLANTAG;
1993 return m;
1995 #endif /* MXGE_NEW_VLAN_API */
1997 static void
1998 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2000 mxge_softc_t *sc;
2001 mcp_kreq_ether_send_t *req;
2002 bus_dma_segment_t *seg;
2003 struct mbuf *m_tmp;
2004 struct ifnet *ifp;
2005 mxge_tx_ring_t *tx;
2006 struct ip *ip;
2007 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
2008 uint16_t pseudo_hdr_offset;
2009 uint8_t flags, cksum_offset;
2012 sc = ss->sc;
2013 ifp = sc->ifp;
2014 tx = &ss->tx;
2016 ip_off = sizeof (struct ether_header);
2017 #ifdef MXGE_NEW_VLAN_API
2018 if (m->m_flags & M_VLANTAG) {
2019 m = mxge_vlan_tag_insert(m);
2020 if (__predict_false(m == NULL))
2021 goto drop;
2022 ip_off += EVL_ENCAPLEN;
2024 #endif
2025 /* (try to) map the frame for DMA */
2026 idx = tx->req & tx->mask;
2027 err = bus_dmamap_load_mbuf_segment(tx->dmat, tx->info[idx].map,
2028 m, tx->seg_list, 1, &cnt,
2029 BUS_DMA_NOWAIT);
2030 if (__predict_false(err == EFBIG)) {
2031 /* Too many segments in the chain. Try
2032 to defrag */
2033 m_tmp = m_defrag(m, M_NOWAIT);
2034 if (m_tmp == NULL) {
2035 goto drop;
2037 ss->tx.defrag++;
2038 m = m_tmp;
2039 err = bus_dmamap_load_mbuf_segment(tx->dmat,
2040 tx->info[idx].map,
2041 m, tx->seg_list, 1, &cnt,
2042 BUS_DMA_NOWAIT);
2044 if (__predict_false(err != 0)) {
2045 device_printf(sc->dev, "bus_dmamap_load_mbuf_segment returned %d"
2046 " packet len = %d\n", err, m->m_pkthdr.len);
2047 goto drop;
2049 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2050 BUS_DMASYNC_PREWRITE);
2051 tx->info[idx].m = m;
2053 #if IFCAP_TSO4
2054 /* TSO is different enough, we handle it in another routine */
2055 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2056 mxge_encap_tso(ss, m, cnt, ip_off);
2057 return;
2059 #endif
2061 req = tx->req_list;
2062 cksum_offset = 0;
2063 pseudo_hdr_offset = 0;
2064 flags = MXGEFW_FLAGS_NO_TSO;
2066 /* checksum offloading? */
2067 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
2068 /* ensure ip header is in first mbuf, copy
2069 it to a scratch buffer if not */
2070 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2071 m_copydata(m, 0, ip_off + sizeof (*ip),
2072 ss->scratch);
2073 ip = (struct ip *)(ss->scratch + ip_off);
2074 } else {
2075 ip = (struct ip *)(mtod(m, char *) + ip_off);
2077 cksum_offset = ip_off + (ip->ip_hl << 2);
2078 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2079 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2080 req->cksum_offset = cksum_offset;
2081 flags |= MXGEFW_FLAGS_CKSUM;
2082 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2083 } else {
2084 odd_flag = 0;
2086 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2087 flags |= MXGEFW_FLAGS_SMALL;
2089 /* convert segments into a request list */
2090 cum_len = 0;
2091 seg = tx->seg_list;
2092 req->flags = MXGEFW_FLAGS_FIRST;
2093 for (i = 0; i < cnt; i++) {
2094 req->addr_low =
2095 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2096 req->addr_high =
2097 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2098 req->length = htobe16(seg->ds_len);
2099 req->cksum_offset = cksum_offset;
2100 if (cksum_offset > seg->ds_len)
2101 cksum_offset -= seg->ds_len;
2102 else
2103 cksum_offset = 0;
2104 req->pseudo_hdr_offset = pseudo_hdr_offset;
2105 req->pad = 0; /* complete solid 16-byte block */
2106 req->rdma_count = 1;
2107 req->flags |= flags | ((cum_len & 1) * odd_flag);
2108 cum_len += seg->ds_len;
2109 seg++;
2110 req++;
2111 req->flags = 0;
2113 req--;
2114 /* pad runts to 60 bytes */
2115 if (cum_len < 60) {
2116 req++;
2117 req->addr_low =
2118 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2119 req->addr_high =
2120 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2121 req->length = htobe16(60 - cum_len);
2122 req->cksum_offset = 0;
2123 req->pseudo_hdr_offset = pseudo_hdr_offset;
2124 req->pad = 0; /* complete solid 16-byte block */
2125 req->rdma_count = 1;
2126 req->flags |= flags | ((cum_len & 1) * odd_flag);
2127 cnt++;
2130 tx->req_list[0].rdma_count = cnt;
2131 #if 0
2132 /* print what the firmware will see */
2133 for (i = 0; i < cnt; i++) {
2134 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2135 "cso:%d, flags:0x%x, rdma:%d\n",
2136 i, (int)ntohl(tx->req_list[i].addr_high),
2137 (int)ntohl(tx->req_list[i].addr_low),
2138 (int)ntohs(tx->req_list[i].length),
2139 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2140 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2141 tx->req_list[i].rdma_count);
2143 kprintf("--------------\n");
2144 #endif
2145 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2146 mxge_submit_req(tx, tx->req_list, cnt);
2147 #ifdef IFNET_BUF_RING
2148 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2149 /* tell the NIC to start polling this slice */
2150 *tx->send_go = 1;
2151 tx->queue_active = 1;
2152 tx->activate++;
2153 wmb();
2155 #endif
2156 return;
2158 drop:
2159 m_freem(m);
2160 ss->oerrors++;
2161 return;
2164 #ifdef IFNET_BUF_RING
2165 static void
2166 mxge_qflush(struct ifnet *ifp)
2168 mxge_softc_t *sc = ifp->if_softc;
2169 mxge_tx_ring_t *tx;
2170 struct mbuf *m;
2171 int slice;
2173 for (slice = 0; slice < sc->num_slices; slice++) {
2174 tx = &sc->ss[slice].tx;
2175 lwkt_serialize_enter(sc->ifp->if_serializer);
2176 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2177 m_freem(m);
2178 lwkt_serialize_exit(sc->ifp->if_serializer);
2180 if_qflush(ifp);
2183 static inline void
2184 mxge_start_locked(struct mxge_slice_state *ss)
2186 mxge_softc_t *sc;
2187 struct mbuf *m;
2188 struct ifnet *ifp;
2189 mxge_tx_ring_t *tx;
2191 sc = ss->sc;
2192 ifp = sc->ifp;
2193 tx = &ss->tx;
2195 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2196 m = drbr_dequeue(ifp, tx->br);
2197 if (m == NULL) {
2198 return;
2200 /* let BPF see it */
2201 BPF_MTAP(ifp, m);
2203 /* give it to the nic */
2204 mxge_encap(ss, m);
2206 /* ran out of transmit slots */
2207 if (((ss->if_flags & IFF_OACTIVE) == 0)
2208 && (!drbr_empty(ifp, tx->br))) {
2209 ss->if_flags |= IFF_OACTIVE;
2210 tx->stall++;
2214 static int
2215 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2217 mxge_softc_t *sc;
2218 struct ifnet *ifp;
2219 mxge_tx_ring_t *tx;
2220 int err;
2222 sc = ss->sc;
2223 ifp = sc->ifp;
2224 tx = &ss->tx;
2226 if ((ss->if_flags & (IFF_RUNNING|IFF_OACTIVE)) !=
2227 IFF_RUNNING) {
2228 err = drbr_enqueue(ifp, tx->br, m);
2229 return (err);
2232 if (drbr_empty(ifp, tx->br) &&
2233 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2234 /* let BPF see it */
2235 BPF_MTAP(ifp, m);
2236 /* give it to the nic */
2237 mxge_encap(ss, m);
2238 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2239 return (err);
2241 if (!drbr_empty(ifp, tx->br))
2242 mxge_start_locked(ss);
2243 return (0);
2246 static int
2247 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2249 mxge_softc_t *sc = ifp->if_softc;
2250 struct mxge_slice_state *ss;
2251 mxge_tx_ring_t *tx;
2252 int err = 0;
2253 int slice;
2255 #if 0
2256 slice = m->m_pkthdr.flowid;
2257 #endif
2258 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2260 ss = &sc->ss[slice];
2261 tx = &ss->tx;
2263 if(lwkt_serialize_try(ifp->if_serializer)) {
2264 err = mxge_transmit_locked(ss, m);
2265 lwkt_serialize_exit(ifp->if_serializer);
2266 } else {
2267 err = drbr_enqueue(ifp, tx->br, m);
2270 return (err);
2273 #else
2275 static inline void
2276 mxge_start_locked(struct mxge_slice_state *ss)
2278 mxge_softc_t *sc;
2279 struct mbuf *m;
2280 struct ifnet *ifp;
2281 mxge_tx_ring_t *tx;
2283 sc = ss->sc;
2284 ifp = sc->ifp;
2285 tx = &ss->tx;
2286 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2287 m = ifq_dequeue(&ifp->if_snd, NULL);
2288 if (m == NULL) {
2289 return;
2291 /* let BPF see it */
2292 BPF_MTAP(ifp, m);
2294 /* give it to the nic */
2295 mxge_encap(ss, m);
2297 /* ran out of transmit slots */
2298 if ((sc->ifp->if_flags & IFF_OACTIVE) == 0) {
2299 sc->ifp->if_flags |= IFF_OACTIVE;
2300 tx->stall++;
2303 #endif
2304 static void
2305 mxge_start(struct ifnet *ifp)
2307 mxge_softc_t *sc = ifp->if_softc;
2308 struct mxge_slice_state *ss;
2310 /* only use the first slice for now */
2311 ss = &sc->ss[0];
2312 lwkt_serialize_enter(ifp->if_serializer);
2313 mxge_start_locked(ss);
2314 lwkt_serialize_exit(ifp->if_serializer);
2318 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2319 * at most 32 bytes at a time, so as to avoid involving the software
2320 * pio handler in the nic. We re-write the first segment's low
2321 * DMA address to mark it valid only after we write the entire chunk
2322 * in a burst
2324 static inline void
2325 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2326 mcp_kreq_ether_recv_t *src)
2328 uint32_t low;
2330 low = src->addr_low;
2331 src->addr_low = 0xffffffff;
2332 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2333 wmb();
2334 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2335 wmb();
2336 src->addr_low = low;
2337 dst->addr_low = low;
2338 wmb();
2341 static int
2342 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2344 bus_dma_segment_t seg;
2345 struct mbuf *m;
2346 mxge_rx_ring_t *rx = &ss->rx_small;
2347 int cnt, err;
2349 m = m_gethdr(MB_DONTWAIT, MT_DATA);
2350 if (m == NULL) {
2351 rx->alloc_fail++;
2352 err = ENOBUFS;
2353 goto done;
2355 m->m_len = MHLEN;
2356 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2357 &seg, 1, &cnt, BUS_DMA_NOWAIT);
2358 if (err != 0) {
2359 m_free(m);
2360 goto done;
2362 rx->info[idx].m = m;
2363 rx->shadow[idx].addr_low =
2364 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2365 rx->shadow[idx].addr_high =
2366 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2368 done:
2369 if ((idx & 7) == 7)
2370 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2371 return err;
2375 static int
2376 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2378 bus_dma_segment_t seg[3];
2379 struct mbuf *m;
2380 mxge_rx_ring_t *rx = &ss->rx_big;
2381 int cnt, err, i;
2383 if (rx->cl_size == MCLBYTES)
2384 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
2385 else {
2386 #if 0
2387 m = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2388 #else
2390 * XXX: allocate normal sized buffers for big buffers.
2391 * We should be fine as long as we don't get any jumbo frames
2393 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
2394 #endif
2396 if (m == NULL) {
2397 rx->alloc_fail++;
2398 err = ENOBUFS;
2399 goto done;
2401 m->m_len = rx->mlen;
2402 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2403 seg, 1, &cnt, BUS_DMA_NOWAIT);
2404 if (err != 0) {
2405 m_free(m);
2406 goto done;
2408 rx->info[idx].m = m;
2409 rx->shadow[idx].addr_low =
2410 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2411 rx->shadow[idx].addr_high =
2412 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2414 #if MXGE_VIRT_JUMBOS
2415 for (i = 1; i < cnt; i++) {
2416 rx->shadow[idx + i].addr_low =
2417 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2418 rx->shadow[idx + i].addr_high =
2419 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2421 #endif
2423 done:
2424 for (i = 0; i < rx->nbufs; i++) {
2425 if ((idx & 7) == 7) {
2426 mxge_submit_8rx(&rx->lanai[idx - 7],
2427 &rx->shadow[idx - 7]);
2429 idx++;
2431 return err;
2435 * Myri10GE hardware checksums are not valid if the sender
2436 * padded the frame with non-zero padding. This is because
2437 * the firmware just does a simple 16-bit 1s complement
2438 * checksum across the entire frame, excluding the first 14
2439 * bytes. It is best to simply to check the checksum and
2440 * tell the stack about it only if the checksum is good
2443 static inline uint16_t
2444 mxge_rx_csum(struct mbuf *m, int csum)
2446 struct ether_header *eh;
2447 struct ip *ip;
2448 uint16_t c;
2450 eh = mtod(m, struct ether_header *);
2452 /* only deal with IPv4 TCP & UDP for now */
2453 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2454 return 1;
2455 ip = (struct ip *)(eh + 1);
2456 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2457 ip->ip_p != IPPROTO_UDP))
2458 return 1;
2459 #ifdef INET
2460 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2461 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2462 - (ip->ip_hl << 2) + ip->ip_p));
2463 #else
2464 c = 1;
2465 #endif
2466 c ^= 0xffff;
2467 return (c);
2470 static void
2471 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2473 struct ether_vlan_header *evl;
2474 struct ether_header *eh;
2475 uint32_t partial;
2477 evl = mtod(m, struct ether_vlan_header *);
2478 eh = mtod(m, struct ether_header *);
2481 * fix checksum by subtracting EVL_ENCAPLEN bytes
2482 * after what the firmware thought was the end of the ethernet
2483 * header.
2486 /* put checksum into host byte order */
2487 *csum = ntohs(*csum);
2488 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2489 (*csum) += ~partial;
2490 (*csum) += ((*csum) < ~partial);
2491 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2492 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2494 /* restore checksum to network byte order;
2495 later consumers expect this */
2496 *csum = htons(*csum);
2498 /* save the tag */
2499 #ifdef MXGE_NEW_VLAN_API
2500 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
2501 #else
2503 struct m_tag *mtag;
2504 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2505 MB_DONTWAIT);
2506 if (mtag == NULL)
2507 return;
2508 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2509 m_tag_prepend(m, mtag);
2512 #endif
2513 m->m_flags |= M_VLANTAG;
2516 * Remove the 802.1q header by copying the Ethernet
2517 * addresses over it and adjusting the beginning of
2518 * the data in the mbuf. The encapsulated Ethernet
2519 * type field is already in place.
2521 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN,
2522 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2523 m_adj(m, EVL_ENCAPLEN);
2527 static inline void
2528 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2530 mxge_softc_t *sc;
2531 struct ifnet *ifp;
2532 struct mbuf *m;
2533 struct ether_header *eh;
2534 mxge_rx_ring_t *rx;
2535 bus_dmamap_t old_map;
2536 int idx;
2537 uint16_t tcpudp_csum;
2539 sc = ss->sc;
2540 ifp = sc->ifp;
2541 rx = &ss->rx_big;
2542 idx = rx->cnt & rx->mask;
2543 rx->cnt += rx->nbufs;
2544 /* save a pointer to the received mbuf */
2545 m = rx->info[idx].m;
2546 /* try to replace the received mbuf */
2547 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2548 /* drop the frame -- the old mbuf is re-cycled */
2549 ifp->if_ierrors++;
2550 return;
2553 /* unmap the received buffer */
2554 old_map = rx->info[idx].map;
2555 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2556 bus_dmamap_unload(rx->dmat, old_map);
2558 /* swap the bus_dmamap_t's */
2559 rx->info[idx].map = rx->extra_map;
2560 rx->extra_map = old_map;
2562 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2563 * aligned */
2564 m->m_data += MXGEFW_PAD;
2566 m->m_pkthdr.rcvif = ifp;
2567 m->m_len = m->m_pkthdr.len = len;
2568 ss->ipackets++;
2569 eh = mtod(m, struct ether_header *);
2570 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2571 mxge_vlan_tag_remove(m, &csum);
2573 /* if the checksum is valid, mark it in the mbuf header */
2574 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2575 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2576 return;
2577 /* otherwise, it was a UDP frame, or a TCP frame which
2578 we could not do LRO on. Tell the stack that the
2579 checksum is good */
2580 m->m_pkthdr.csum_data = 0xffff;
2581 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2583 #if 0
2584 /* flowid only valid if RSS hashing is enabled */
2585 if (sc->num_slices > 1) {
2586 m->m_pkthdr.flowid = (ss - sc->ss);
2587 m->m_flags |= M_FLOWID;
2589 #endif
2590 /* pass the frame up the stack */
2591 (*ifp->if_input)(ifp, m);
2594 static inline void
2595 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2597 mxge_softc_t *sc;
2598 struct ifnet *ifp;
2599 struct ether_header *eh;
2600 struct mbuf *m;
2601 mxge_rx_ring_t *rx;
2602 bus_dmamap_t old_map;
2603 int idx;
2604 uint16_t tcpudp_csum;
2606 sc = ss->sc;
2607 ifp = sc->ifp;
2608 rx = &ss->rx_small;
2609 idx = rx->cnt & rx->mask;
2610 rx->cnt++;
2611 /* save a pointer to the received mbuf */
2612 m = rx->info[idx].m;
2613 /* try to replace the received mbuf */
2614 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2615 /* drop the frame -- the old mbuf is re-cycled */
2616 ifp->if_ierrors++;
2617 return;
2620 /* unmap the received buffer */
2621 old_map = rx->info[idx].map;
2622 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2623 bus_dmamap_unload(rx->dmat, old_map);
2625 /* swap the bus_dmamap_t's */
2626 rx->info[idx].map = rx->extra_map;
2627 rx->extra_map = old_map;
2629 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2630 * aligned */
2631 m->m_data += MXGEFW_PAD;
2633 m->m_pkthdr.rcvif = ifp;
2634 m->m_len = m->m_pkthdr.len = len;
2635 ss->ipackets++;
2636 eh = mtod(m, struct ether_header *);
2637 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2638 mxge_vlan_tag_remove(m, &csum);
2640 /* if the checksum is valid, mark it in the mbuf header */
2641 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2642 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2643 return;
2644 /* otherwise, it was a UDP frame, or a TCP frame which
2645 we could not do LRO on. Tell the stack that the
2646 checksum is good */
2647 m->m_pkthdr.csum_data = 0xffff;
2648 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2650 #if 0
2651 /* flowid only valid if RSS hashing is enabled */
2652 if (sc->num_slices > 1) {
2653 m->m_pkthdr.flowid = (ss - sc->ss);
2654 m->m_flags |= M_FLOWID;
2656 #endif
2657 /* pass the frame up the stack */
2658 (*ifp->if_input)(ifp, m);
2661 static inline void
2662 mxge_clean_rx_done(struct mxge_slice_state *ss)
2664 mxge_rx_done_t *rx_done = &ss->rx_done;
2665 int limit = 0;
2666 uint16_t length;
2667 uint16_t checksum;
2670 while (rx_done->entry[rx_done->idx].length != 0) {
2671 length = ntohs(rx_done->entry[rx_done->idx].length);
2672 rx_done->entry[rx_done->idx].length = 0;
2673 checksum = rx_done->entry[rx_done->idx].checksum;
2674 if (length <= (MHLEN - MXGEFW_PAD))
2675 mxge_rx_done_small(ss, length, checksum);
2676 else
2677 mxge_rx_done_big(ss, length, checksum);
2678 rx_done->cnt++;
2679 rx_done->idx = rx_done->cnt & rx_done->mask;
2681 /* limit potential for livelock */
2682 if (__predict_false(++limit > rx_done->mask / 2))
2683 break;
2685 #ifdef INET
2686 while (!SLIST_EMPTY(&ss->lro_active)) {
2687 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
2688 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2689 mxge_lro_flush(ss, lro);
2691 #endif
2695 static inline void
2696 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2698 struct ifnet *ifp;
2699 mxge_tx_ring_t *tx;
2700 struct mbuf *m;
2701 bus_dmamap_t map;
2702 int idx;
2703 int *flags;
2705 tx = &ss->tx;
2706 ifp = ss->sc->ifp;
2707 while (tx->pkt_done != mcp_idx) {
2708 idx = tx->done & tx->mask;
2709 tx->done++;
2710 m = tx->info[idx].m;
2711 /* mbuf and DMA map only attached to the first
2712 segment per-mbuf */
2713 if (m != NULL) {
2714 ss->obytes += m->m_pkthdr.len;
2715 if (m->m_flags & M_MCAST)
2716 ss->omcasts++;
2717 ss->opackets++;
2718 tx->info[idx].m = NULL;
2719 map = tx->info[idx].map;
2720 bus_dmamap_unload(tx->dmat, map);
2721 m_freem(m);
2723 if (tx->info[idx].flag) {
2724 tx->info[idx].flag = 0;
2725 tx->pkt_done++;
2729 /* If we have space, clear IFF_OACTIVE to tell the stack that
2730 its OK to send packets */
2731 #ifdef IFNET_BUF_RING
2732 flags = &ss->if_flags;
2733 #else
2734 flags = &ifp->if_flags;
2735 #endif
2736 lwkt_serialize_enter(ifp->if_serializer);
2737 if ((*flags) & IFF_OACTIVE &&
2738 tx->req - tx->done < (tx->mask + 1)/4) {
2739 *(flags) &= ~IFF_OACTIVE;
2740 ss->tx.wake++;
2741 mxge_start_locked(ss);
2743 #ifdef IFNET_BUF_RING
2744 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2745 /* let the NIC stop polling this queue, since there
2746 * are no more transmits pending */
2747 if (tx->req == tx->done) {
2748 *tx->send_stop = 1;
2749 tx->queue_active = 0;
2750 tx->deactivate++;
2751 wmb();
2754 #endif
2755 lwkt_serialize_exit(ifp->if_serializer);
2759 static struct mxge_media_type mxge_xfp_media_types[] =
2761 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2762 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2763 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2764 {0, (1 << 5), "10GBASE-ER"},
2765 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2766 {0, (1 << 3), "10GBASE-SW"},
2767 {0, (1 << 2), "10GBASE-LW"},
2768 {0, (1 << 1), "10GBASE-EW"},
2769 {0, (1 << 0), "Reserved"}
2771 static struct mxge_media_type mxge_sfp_media_types[] =
2773 {0, (1 << 7), "Reserved"},
2774 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2775 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2776 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}
2779 static void
2780 mxge_set_media(mxge_softc_t *sc, int type)
2782 sc->media_flags |= type;
2783 ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
2784 ifmedia_set(&sc->media, sc->media_flags);
2789 * Determine the media type for a NIC. Some XFPs will identify
2790 * themselves only when their link is up, so this is initiated via a
2791 * link up interrupt. However, this can potentially take up to
2792 * several milliseconds, so it is run via the watchdog routine, rather
2793 * than in the interrupt handler itself. This need only be done
2794 * once, not each time the link is up.
2796 static void
2797 mxge_media_probe(mxge_softc_t *sc)
2799 mxge_cmd_t cmd;
2800 char *cage_type;
2801 char *ptr;
2802 struct mxge_media_type *mxge_media_types = NULL;
2803 int i, err, ms, mxge_media_type_entries;
2804 uint32_t byte;
2806 sc->need_media_probe = 0;
2808 /* if we've already set a media type, we're done */
2809 if (sc->media_flags != (IFM_ETHER | IFM_AUTO))
2810 return;
2813 * parse the product code to deterimine the interface type
2814 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2815 * after the 3rd dash in the driver's cached copy of the
2816 * EEPROM's product code string.
2818 ptr = sc->product_code_string;
2819 if (ptr == NULL) {
2820 device_printf(sc->dev, "Missing product code\n");
2823 for (i = 0; i < 3; i++, ptr++) {
2824 ptr = index(ptr, '-');
2825 if (ptr == NULL) {
2826 device_printf(sc->dev,
2827 "only %d dashes in PC?!?\n", i);
2828 return;
2831 if (*ptr == 'C') {
2832 /* -C is CX4 */
2833 mxge_set_media(sc, IFM_10G_CX4);
2834 return;
2836 else if (*ptr == 'Q') {
2837 /* -Q is Quad Ribbon Fiber */
2838 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2839 /* FreeBSD has no media type for Quad ribbon fiber */
2840 return;
2843 if (*ptr == 'R') {
2844 /* -R is XFP */
2845 mxge_media_types = mxge_xfp_media_types;
2846 mxge_media_type_entries =
2847 sizeof (mxge_xfp_media_types) /
2848 sizeof (mxge_xfp_media_types[0]);
2849 byte = MXGE_XFP_COMPLIANCE_BYTE;
2850 cage_type = "XFP";
2853 if (*ptr == 'S' || *(ptr +1) == 'S') {
2854 /* -S or -2S is SFP+ */
2855 mxge_media_types = mxge_sfp_media_types;
2856 mxge_media_type_entries =
2857 sizeof (mxge_sfp_media_types) /
2858 sizeof (mxge_sfp_media_types[0]);
2859 cage_type = "SFP+";
2860 byte = 3;
2863 if (mxge_media_types == NULL) {
2864 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2865 return;
2869 * At this point we know the NIC has an XFP cage, so now we
2870 * try to determine what is in the cage by using the
2871 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2872 * register. We read just one byte, which may take over
2873 * a millisecond
2876 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2877 cmd.data1 = byte;
2878 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2879 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2880 device_printf(sc->dev, "failed to read XFP\n");
2882 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2883 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2885 if (err != MXGEFW_CMD_OK) {
2886 return;
2889 /* now we wait for the data to be cached */
2890 cmd.data0 = byte;
2891 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2892 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2893 DELAY(1000);
2894 cmd.data0 = byte;
2895 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2897 if (err != MXGEFW_CMD_OK) {
2898 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2899 cage_type, err, ms);
2900 return;
2903 if (cmd.data0 == mxge_media_types[0].bitmask) {
2904 if (mxge_verbose)
2905 device_printf(sc->dev, "%s:%s\n", cage_type,
2906 mxge_media_types[0].name);
2907 mxge_set_media(sc, IFM_10G_CX4);
2908 return;
2910 for (i = 1; i < mxge_media_type_entries; i++) {
2911 if (cmd.data0 & mxge_media_types[i].bitmask) {
2912 if (mxge_verbose)
2913 device_printf(sc->dev, "%s:%s\n",
2914 cage_type,
2915 mxge_media_types[i].name);
2917 mxge_set_media(sc, mxge_media_types[i].flag);
2918 return;
2921 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type,
2922 cmd.data0);
2924 return;
2927 static void
2928 mxge_intr(void *arg)
2930 struct mxge_slice_state *ss = arg;
2931 mxge_softc_t *sc = ss->sc;
2932 mcp_irq_data_t *stats = ss->fw_stats;
2933 mxge_tx_ring_t *tx = &ss->tx;
2934 mxge_rx_done_t *rx_done = &ss->rx_done;
2935 uint32_t send_done_count;
2936 uint8_t valid;
2939 #ifndef IFNET_BUF_RING
2940 /* an interrupt on a non-zero slice is implicitly valid
2941 since MSI-X irqs are not shared */
2942 if (ss != sc->ss) {
2943 mxge_clean_rx_done(ss);
2944 *ss->irq_claim = be32toh(3);
2945 return;
2947 #endif
2949 /* make sure the DMA has finished */
2950 if (!stats->valid) {
2951 return;
2953 valid = stats->valid;
2955 if (sc->legacy_irq) {
2956 /* lower legacy IRQ */
2957 *sc->irq_deassert = 0;
2958 if (!mxge_deassert_wait)
2959 /* don't wait for conf. that irq is low */
2960 stats->valid = 0;
2961 } else {
2962 stats->valid = 0;
2965 /* loop while waiting for legacy irq deassertion */
2966 do {
2967 /* check for transmit completes and receives */
2968 send_done_count = be32toh(stats->send_done_count);
2969 while ((send_done_count != tx->pkt_done) ||
2970 (rx_done->entry[rx_done->idx].length != 0)) {
2971 if (send_done_count != tx->pkt_done)
2972 mxge_tx_done(ss, (int)send_done_count);
2973 mxge_clean_rx_done(ss);
2974 send_done_count = be32toh(stats->send_done_count);
2976 if (sc->legacy_irq && mxge_deassert_wait)
2977 wmb();
2978 } while (*((volatile uint8_t *) &stats->valid));
2980 /* fw link & error stats meaningful only on the first slice */
2981 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
2982 if (sc->link_state != stats->link_up) {
2983 sc->link_state = stats->link_up;
2984 if (sc->link_state) {
2985 sc->ifp->if_link_state = LINK_STATE_UP;
2986 if_link_state_change(sc->ifp);
2987 if (mxge_verbose)
2988 device_printf(sc->dev, "link up\n");
2989 } else {
2990 sc->ifp->if_link_state = LINK_STATE_DOWN;
2991 if_link_state_change(sc->ifp);
2992 if (mxge_verbose)
2993 device_printf(sc->dev, "link down\n");
2995 sc->need_media_probe = 1;
2997 if (sc->rdma_tags_available !=
2998 be32toh(stats->rdma_tags_available)) {
2999 sc->rdma_tags_available =
3000 be32toh(stats->rdma_tags_available);
3001 device_printf(sc->dev, "RDMA timed out! %d tags "
3002 "left\n", sc->rdma_tags_available);
3005 if (stats->link_down) {
3006 sc->down_cnt += stats->link_down;
3007 sc->link_state = 0;
3008 sc->ifp->if_link_state = LINK_STATE_DOWN;
3009 if_link_state_change(sc->ifp);
3013 /* check to see if we have rx token to pass back */
3014 if (valid & 0x1)
3015 *ss->irq_claim = be32toh(3);
3016 *(ss->irq_claim + 1) = be32toh(3);
3019 static void
3020 mxge_init(void *arg)
3026 static void
3027 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3029 struct lro_entry *lro_entry;
3030 int i;
3032 while (!SLIST_EMPTY(&ss->lro_free)) {
3033 lro_entry = SLIST_FIRST(&ss->lro_free);
3034 SLIST_REMOVE_HEAD(&ss->lro_free, next);
3035 kfree(lro_entry, M_DEVBUF);
3038 for (i = 0; i <= ss->rx_big.mask; i++) {
3039 if (ss->rx_big.info[i].m == NULL)
3040 continue;
3041 bus_dmamap_unload(ss->rx_big.dmat,
3042 ss->rx_big.info[i].map);
3043 m_freem(ss->rx_big.info[i].m);
3044 ss->rx_big.info[i].m = NULL;
3047 for (i = 0; i <= ss->rx_small.mask; i++) {
3048 if (ss->rx_small.info[i].m == NULL)
3049 continue;
3050 bus_dmamap_unload(ss->rx_small.dmat,
3051 ss->rx_small.info[i].map);
3052 m_freem(ss->rx_small.info[i].m);
3053 ss->rx_small.info[i].m = NULL;
3056 /* transmit ring used only on the first slice */
3057 if (ss->tx.info == NULL)
3058 return;
3060 for (i = 0; i <= ss->tx.mask; i++) {
3061 ss->tx.info[i].flag = 0;
3062 if (ss->tx.info[i].m == NULL)
3063 continue;
3064 bus_dmamap_unload(ss->tx.dmat,
3065 ss->tx.info[i].map);
3066 m_freem(ss->tx.info[i].m);
3067 ss->tx.info[i].m = NULL;
3071 static void
3072 mxge_free_mbufs(mxge_softc_t *sc)
3074 int slice;
3076 for (slice = 0; slice < sc->num_slices; slice++)
3077 mxge_free_slice_mbufs(&sc->ss[slice]);
3080 static void
3081 mxge_free_slice_rings(struct mxge_slice_state *ss)
3083 int i;
3086 if (ss->rx_done.entry != NULL)
3087 mxge_dma_free(&ss->rx_done.dma);
3088 ss->rx_done.entry = NULL;
3090 if (ss->tx.req_bytes != NULL)
3091 kfree(ss->tx.req_bytes, M_DEVBUF);
3092 ss->tx.req_bytes = NULL;
3094 if (ss->tx.seg_list != NULL)
3095 kfree(ss->tx.seg_list, M_DEVBUF);
3096 ss->tx.seg_list = NULL;
3098 if (ss->rx_small.shadow != NULL)
3099 kfree(ss->rx_small.shadow, M_DEVBUF);
3100 ss->rx_small.shadow = NULL;
3102 if (ss->rx_big.shadow != NULL)
3103 kfree(ss->rx_big.shadow, M_DEVBUF);
3104 ss->rx_big.shadow = NULL;
3106 if (ss->tx.info != NULL) {
3107 if (ss->tx.dmat != NULL) {
3108 for (i = 0; i <= ss->tx.mask; i++) {
3109 bus_dmamap_destroy(ss->tx.dmat,
3110 ss->tx.info[i].map);
3112 bus_dma_tag_destroy(ss->tx.dmat);
3114 kfree(ss->tx.info, M_DEVBUF);
3116 ss->tx.info = NULL;
3118 if (ss->rx_small.info != NULL) {
3119 if (ss->rx_small.dmat != NULL) {
3120 for (i = 0; i <= ss->rx_small.mask; i++) {
3121 bus_dmamap_destroy(ss->rx_small.dmat,
3122 ss->rx_small.info[i].map);
3124 bus_dmamap_destroy(ss->rx_small.dmat,
3125 ss->rx_small.extra_map);
3126 bus_dma_tag_destroy(ss->rx_small.dmat);
3128 kfree(ss->rx_small.info, M_DEVBUF);
3130 ss->rx_small.info = NULL;
3132 if (ss->rx_big.info != NULL) {
3133 if (ss->rx_big.dmat != NULL) {
3134 for (i = 0; i <= ss->rx_big.mask; i++) {
3135 bus_dmamap_destroy(ss->rx_big.dmat,
3136 ss->rx_big.info[i].map);
3138 bus_dmamap_destroy(ss->rx_big.dmat,
3139 ss->rx_big.extra_map);
3140 bus_dma_tag_destroy(ss->rx_big.dmat);
3142 kfree(ss->rx_big.info, M_DEVBUF);
3144 ss->rx_big.info = NULL;
3147 static void
3148 mxge_free_rings(mxge_softc_t *sc)
3150 int slice;
3152 for (slice = 0; slice < sc->num_slices; slice++)
3153 mxge_free_slice_rings(&sc->ss[slice]);
3156 static int
3157 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3158 int tx_ring_entries)
3160 mxge_softc_t *sc = ss->sc;
3161 size_t bytes;
3162 int err, i;
3164 err = ENOMEM;
3166 /* allocate per-slice receive resources */
3168 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3169 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3171 /* allocate the rx shadow rings */
3172 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3173 ss->rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3174 if (ss->rx_small.shadow == NULL)
3175 return err;;
3177 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3178 ss->rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3179 if (ss->rx_big.shadow == NULL)
3180 return err;;
3182 /* allocate the rx host info rings */
3183 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3184 ss->rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3185 if (ss->rx_small.info == NULL)
3186 return err;;
3188 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3189 ss->rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3190 if (ss->rx_big.info == NULL)
3191 return err;;
3193 /* allocate the rx busdma resources */
3194 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3195 1, /* alignment */
3196 4096, /* boundary */
3197 BUS_SPACE_MAXADDR, /* low */
3198 BUS_SPACE_MAXADDR, /* high */
3199 NULL, NULL, /* filter */
3200 MHLEN, /* maxsize */
3201 1, /* num segs */
3202 MHLEN, /* maxsegsize */
3203 BUS_DMA_ALLOCNOW, /* flags */
3204 &ss->rx_small.dmat); /* tag */
3205 if (err != 0) {
3206 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3207 err);
3208 return err;;
3211 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3212 1, /* alignment */
3213 #if MXGE_VIRT_JUMBOS
3214 4096, /* boundary */
3215 #else
3216 0, /* boundary */
3217 #endif
3218 BUS_SPACE_MAXADDR, /* low */
3219 BUS_SPACE_MAXADDR, /* high */
3220 NULL, NULL, /* filter */
3221 3*4096, /* maxsize */
3222 #if MXGE_VIRT_JUMBOS
3223 3, /* num segs */
3224 4096, /* maxsegsize*/
3225 #else
3226 1, /* num segs */
3227 MJUM9BYTES, /* maxsegsize*/
3228 #endif
3229 BUS_DMA_ALLOCNOW, /* flags */
3230 &ss->rx_big.dmat); /* tag */
3231 if (err != 0) {
3232 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3233 err);
3234 return err;;
3236 for (i = 0; i <= ss->rx_small.mask; i++) {
3237 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3238 &ss->rx_small.info[i].map);
3239 if (err != 0) {
3240 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3241 err);
3242 return err;;
3245 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3246 &ss->rx_small.extra_map);
3247 if (err != 0) {
3248 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3249 err);
3250 return err;;
3253 for (i = 0; i <= ss->rx_big.mask; i++) {
3254 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3255 &ss->rx_big.info[i].map);
3256 if (err != 0) {
3257 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3258 err);
3259 return err;;
3262 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3263 &ss->rx_big.extra_map);
3264 if (err != 0) {
3265 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3266 err);
3267 return err;;
3270 /* now allocate TX resouces */
3272 #ifndef IFNET_BUF_RING
3273 /* only use a single TX ring for now */
3274 if (ss != ss->sc->ss)
3275 return 0;
3276 #endif
3278 ss->tx.mask = tx_ring_entries - 1;
3279 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3282 /* allocate the tx request copy block */
3283 bytes = 8 +
3284 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3285 ss->tx.req_bytes = kmalloc(bytes, M_DEVBUF, M_WAITOK);
3286 if (ss->tx.req_bytes == NULL)
3287 return err;;
3288 /* ensure req_list entries are aligned to 8 bytes */
3289 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3290 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3292 /* allocate the tx busdma segment list */
3293 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3294 ss->tx.seg_list = (bus_dma_segment_t *)
3295 kmalloc(bytes, M_DEVBUF, M_WAITOK);
3296 if (ss->tx.seg_list == NULL)
3297 return err;;
3299 /* allocate the tx host info ring */
3300 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3301 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3302 if (ss->tx.info == NULL)
3303 return err;;
3305 /* allocate the tx busdma resources */
3306 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3307 1, /* alignment */
3308 sc->tx_boundary, /* boundary */
3309 BUS_SPACE_MAXADDR, /* low */
3310 BUS_SPACE_MAXADDR, /* high */
3311 NULL, NULL, /* filter */
3312 65536 + 256, /* maxsize */
3313 ss->tx.max_desc - 2, /* num segs */
3314 sc->tx_boundary, /* maxsegsz */
3315 BUS_DMA_ALLOCNOW, /* flags */
3316 &ss->tx.dmat); /* tag */
3318 if (err != 0) {
3319 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3320 err);
3321 return err;;
3324 /* now use these tags to setup dmamaps for each slot
3325 in the ring */
3326 for (i = 0; i <= ss->tx.mask; i++) {
3327 err = bus_dmamap_create(ss->tx.dmat, 0,
3328 &ss->tx.info[i].map);
3329 if (err != 0) {
3330 device_printf(sc->dev, "Err %d tx dmamap\n",
3331 err);
3332 return err;;
3335 return 0;
3339 static int
3340 mxge_alloc_rings(mxge_softc_t *sc)
3342 mxge_cmd_t cmd;
3343 int tx_ring_size;
3344 int tx_ring_entries, rx_ring_entries;
3345 int err, slice;
3347 /* get ring sizes */
3348 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3349 tx_ring_size = cmd.data0;
3350 if (err != 0) {
3351 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3352 goto abort;
3355 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3356 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3357 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1);
3358 ifq_set_ready(&sc->ifp->if_snd);
3360 for (slice = 0; slice < sc->num_slices; slice++) {
3361 err = mxge_alloc_slice_rings(&sc->ss[slice],
3362 rx_ring_entries,
3363 tx_ring_entries);
3364 if (err != 0)
3365 goto abort;
3367 return 0;
3369 abort:
3370 mxge_free_rings(sc);
3371 return err;
3376 static void
3377 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3379 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD;
3381 if (bufsize < MCLBYTES) {
3382 /* easy, everything fits in a single buffer */
3383 *big_buf_size = MCLBYTES;
3384 *cl_size = MCLBYTES;
3385 *nbufs = 1;
3386 return;
3389 if (bufsize < MJUMPAGESIZE) {
3390 /* still easy, everything still fits in a single buffer */
3391 *big_buf_size = MJUMPAGESIZE;
3392 *cl_size = MJUMPAGESIZE;
3393 *nbufs = 1;
3394 return;
3396 #if MXGE_VIRT_JUMBOS
3397 /* now we need to use virtually contiguous buffers */
3398 *cl_size = MJUM9BYTES;
3399 *big_buf_size = 4096;
3400 *nbufs = mtu / 4096 + 1;
3401 /* needs to be a power of two, so round up */
3402 if (*nbufs == 3)
3403 *nbufs = 4;
3404 #else
3405 *cl_size = MJUM9BYTES;
3406 *big_buf_size = MJUM9BYTES;
3407 *nbufs = 1;
3408 #endif
3411 static int
3412 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3414 mxge_softc_t *sc;
3415 mxge_cmd_t cmd;
3416 bus_dmamap_t map;
3417 struct lro_entry *lro_entry;
3418 int err, i, slice;
3421 sc = ss->sc;
3422 slice = ss - sc->ss;
3424 SLIST_INIT(&ss->lro_free);
3425 SLIST_INIT(&ss->lro_active);
3427 for (i = 0; i < sc->lro_cnt; i++) {
3428 lro_entry = (struct lro_entry *)
3429 kmalloc(sizeof (*lro_entry), M_DEVBUF,
3430 M_NOWAIT | M_ZERO);
3431 if (lro_entry == NULL) {
3432 sc->lro_cnt = i;
3433 break;
3435 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3437 /* get the lanai pointers to the send and receive rings */
3439 err = 0;
3440 #ifndef IFNET_BUF_RING
3441 /* We currently only send from the first slice */
3442 if (slice == 0) {
3443 #endif
3444 cmd.data0 = slice;
3445 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3446 ss->tx.lanai =
3447 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3448 ss->tx.send_go = (volatile uint32_t *)
3449 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3450 ss->tx.send_stop = (volatile uint32_t *)
3451 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3452 #ifndef IFNET_BUF_RING
3454 #endif
3455 cmd.data0 = slice;
3456 err |= mxge_send_cmd(sc,
3457 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3458 ss->rx_small.lanai =
3459 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3460 cmd.data0 = slice;
3461 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3462 ss->rx_big.lanai =
3463 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3465 if (err != 0) {
3466 device_printf(sc->dev,
3467 "failed to get ring sizes or locations\n");
3468 return EIO;
3471 /* stock receive rings */
3472 for (i = 0; i <= ss->rx_small.mask; i++) {
3473 map = ss->rx_small.info[i].map;
3474 err = mxge_get_buf_small(ss, map, i);
3475 if (err) {
3476 device_printf(sc->dev, "alloced %d/%d smalls\n",
3477 i, ss->rx_small.mask + 1);
3478 return ENOMEM;
3481 for (i = 0; i <= ss->rx_big.mask; i++) {
3482 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3483 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3485 ss->rx_big.nbufs = nbufs;
3486 ss->rx_big.cl_size = cl_size;
3487 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3488 EVL_ENCAPLEN + MXGEFW_PAD;
3489 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3490 map = ss->rx_big.info[i].map;
3491 err = mxge_get_buf_big(ss, map, i);
3492 if (err) {
3493 device_printf(sc->dev, "alloced %d/%d bigs\n",
3494 i, ss->rx_big.mask + 1);
3495 return ENOMEM;
3498 return 0;
3501 static int
3502 mxge_open(mxge_softc_t *sc)
3504 mxge_cmd_t cmd;
3505 int err, big_bytes, nbufs, slice, cl_size, i;
3506 bus_addr_t bus;
3507 volatile uint8_t *itable;
3508 struct mxge_slice_state *ss;
3510 /* Copy the MAC address in case it was overridden */
3511 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3513 err = mxge_reset(sc, 1);
3514 if (err != 0) {
3515 device_printf(sc->dev, "failed to reset\n");
3516 return EIO;
3519 if (sc->num_slices > 1) {
3520 /* setup the indirection table */
3521 cmd.data0 = sc->num_slices;
3522 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3523 &cmd);
3525 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3526 &cmd);
3527 if (err != 0) {
3528 device_printf(sc->dev,
3529 "failed to setup rss tables\n");
3530 return err;
3533 /* just enable an identity mapping */
3534 itable = sc->sram + cmd.data0;
3535 for (i = 0; i < sc->num_slices; i++)
3536 itable[i] = (uint8_t)i;
3538 cmd.data0 = 1;
3539 cmd.data1 = mxge_rss_hash_type;
3540 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3541 if (err != 0) {
3542 device_printf(sc->dev, "failed to enable slices\n");
3543 return err;
3548 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3550 cmd.data0 = nbufs;
3551 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3552 &cmd);
3553 /* error is only meaningful if we're trying to set
3554 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3555 if (err && nbufs > 1) {
3556 device_printf(sc->dev,
3557 "Failed to set alway-use-n to %d\n",
3558 nbufs);
3559 return EIO;
3561 /* Give the firmware the mtu and the big and small buffer
3562 sizes. The firmware wants the big buf size to be a power
3563 of two. Luckily, FreeBSD's clusters are powers of two */
3564 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3565 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3566 cmd.data0 = MHLEN - MXGEFW_PAD;
3567 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3568 &cmd);
3569 cmd.data0 = big_bytes;
3570 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3572 if (err != 0) {
3573 device_printf(sc->dev, "failed to setup params\n");
3574 goto abort;
3577 /* Now give him the pointer to the stats block */
3578 for (slice = 0;
3579 #ifdef IFNET_BUF_RING
3580 slice < sc->num_slices;
3581 #else
3582 slice < 1;
3583 #endif
3584 slice++) {
3585 ss = &sc->ss[slice];
3586 cmd.data0 =
3587 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3588 cmd.data1 =
3589 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3590 cmd.data2 = sizeof(struct mcp_irq_data);
3591 cmd.data2 |= (slice << 16);
3592 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3595 if (err != 0) {
3596 bus = sc->ss->fw_stats_dma.bus_addr;
3597 bus += offsetof(struct mcp_irq_data, send_done_count);
3598 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3599 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3600 err = mxge_send_cmd(sc,
3601 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3602 &cmd);
3603 /* Firmware cannot support multicast without STATS_DMA_V2 */
3604 sc->fw_multicast_support = 0;
3605 } else {
3606 sc->fw_multicast_support = 1;
3609 if (err != 0) {
3610 device_printf(sc->dev, "failed to setup params\n");
3611 goto abort;
3614 for (slice = 0; slice < sc->num_slices; slice++) {
3615 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3616 if (err != 0) {
3617 device_printf(sc->dev, "couldn't open slice %d\n",
3618 slice);
3619 goto abort;
3623 /* Finally, start the firmware running */
3624 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3625 if (err) {
3626 device_printf(sc->dev, "Couldn't bring up link\n");
3627 goto abort;
3629 #ifdef IFNET_BUF_RING
3630 for (slice = 0; slice < sc->num_slices; slice++) {
3631 ss = &sc->ss[slice];
3632 ss->if_flags |= IFF_RUNNING;
3633 ss->if_flags &= ~IFF_OACTIVE;
3635 #endif
3636 sc->ifp->if_flags |= IFF_RUNNING;
3637 sc->ifp->if_flags &= ~IFF_OACTIVE;
3638 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3640 return 0;
3643 abort:
3644 mxge_free_mbufs(sc);
3646 return err;
3649 static int
3650 mxge_close(mxge_softc_t *sc)
3652 mxge_cmd_t cmd;
3653 int err, old_down_cnt;
3654 #ifdef IFNET_BUF_RING
3655 struct mxge_slice_state *ss;
3656 int slice;
3657 #endif
3659 callout_stop(&sc->co_hdl);
3660 #ifdef IFNET_BUF_RING
3661 for (slice = 0; slice < sc->num_slices; slice++) {
3662 ss = &sc->ss[slice];
3663 ss->if_flags &= ~IFF_RUNNING;
3665 #endif
3666 sc->ifp->if_flags &= ~IFF_RUNNING;
3667 old_down_cnt = sc->down_cnt;
3668 wmb();
3669 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3670 if (err) {
3671 device_printf(sc->dev, "Couldn't bring down link\n");
3673 if (old_down_cnt == sc->down_cnt) {
3674 /* wait for down irq */
3675 DELAY(10 * sc->intr_coal_delay);
3677 wmb();
3678 if (old_down_cnt == sc->down_cnt) {
3679 device_printf(sc->dev, "never got down irq\n");
3682 mxge_free_mbufs(sc);
3684 return 0;
3687 static void
3688 mxge_setup_cfg_space(mxge_softc_t *sc)
3690 device_t dev = sc->dev;
3691 int reg;
3692 uint16_t cmd, lnk, pectl;
3694 /* find the PCIe link width and set max read request to 4KB*/
3695 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3696 lnk = pci_read_config(dev, reg + 0x12, 2);
3697 sc->link_width = (lnk >> 4) & 0x3f;
3699 pectl = pci_read_config(dev, reg + 0x8, 2);
3700 pectl = (pectl & ~0x7000) | (5 << 12);
3701 pci_write_config(dev, reg + 0x8, pectl, 2);
3704 /* Enable DMA and Memory space access */
3705 pci_enable_busmaster(dev);
3706 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3707 cmd |= PCIM_CMD_MEMEN;
3708 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3711 static uint32_t
3712 mxge_read_reboot(mxge_softc_t *sc)
3714 device_t dev = sc->dev;
3715 uint32_t vs;
3717 /* find the vendor specific offset */
3718 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3719 device_printf(sc->dev,
3720 "could not find vendor specific offset\n");
3721 return (uint32_t)-1;
3723 /* enable read32 mode */
3724 pci_write_config(dev, vs + 0x10, 0x3, 1);
3725 /* tell NIC which register to read */
3726 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3727 return (pci_read_config(dev, vs + 0x14, 4));
3730 static int
3731 mxge_watchdog_reset(mxge_softc_t *sc, int slice)
3733 struct pci_devinfo *dinfo;
3734 mxge_tx_ring_t *tx;
3735 int err;
3736 uint32_t reboot;
3737 uint16_t cmd;
3739 err = ENXIO;
3741 device_printf(sc->dev, "Watchdog reset!\n");
3744 * check to see if the NIC rebooted. If it did, then all of
3745 * PCI config space has been reset, and things like the
3746 * busmaster bit will be zero. If this is the case, then we
3747 * must restore PCI config space before the NIC can be used
3748 * again
3750 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3751 if (cmd == 0xffff) {
3753 * maybe the watchdog caught the NIC rebooting; wait
3754 * up to 100ms for it to finish. If it does not come
3755 * back, then give up
3757 DELAY(1000*100);
3758 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3759 if (cmd == 0xffff) {
3760 device_printf(sc->dev, "NIC disappeared!\n");
3761 return (err);
3764 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3765 /* print the reboot status */
3766 reboot = mxge_read_reboot(sc);
3767 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3768 reboot);
3769 /* restore PCI configuration space */
3770 dinfo = device_get_ivars(sc->dev);
3771 pci_cfg_restore(sc->dev, dinfo);
3773 /* and redo any changes we made to our config space */
3774 mxge_setup_cfg_space(sc);
3776 if (sc->ifp->if_flags & IFF_RUNNING) {
3777 mxge_close(sc);
3778 err = mxge_open(sc);
3780 } else {
3781 tx = &sc->ss[slice].tx;
3782 device_printf(sc->dev,
3783 "NIC did not reboot, slice %d ring state:\n",
3784 slice);
3785 device_printf(sc->dev,
3786 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3787 tx->req, tx->done, tx->queue_active);
3788 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3789 tx->activate, tx->deactivate);
3790 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3791 tx->pkt_done,
3792 be32toh(sc->ss->fw_stats->send_done_count));
3793 device_printf(sc->dev, "not resetting\n");
3795 return (err);
3798 static int
3799 mxge_watchdog(mxge_softc_t *sc)
3801 mxge_tx_ring_t *tx;
3802 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3803 int i, err = 0;
3805 /* see if we have outstanding transmits, which
3806 have been pending for more than mxge_ticks */
3807 for (i = 0;
3808 #ifdef IFNET_BUF_RING
3809 (i < sc->num_slices) && (err == 0);
3810 #else
3811 (i < 1) && (err == 0);
3812 #endif
3813 i++) {
3814 tx = &sc->ss[i].tx;
3815 if (tx->req != tx->done &&
3816 tx->watchdog_req != tx->watchdog_done &&
3817 tx->done == tx->watchdog_done) {
3818 /* check for pause blocking before resetting */
3819 if (tx->watchdog_rx_pause == rx_pause)
3820 err = mxge_watchdog_reset(sc, i);
3821 else
3822 device_printf(sc->dev, "Flow control blocking "
3823 "xmits, check link partner\n");
3826 tx->watchdog_req = tx->req;
3827 tx->watchdog_done = tx->done;
3828 tx->watchdog_rx_pause = rx_pause;
3831 if (sc->need_media_probe)
3832 mxge_media_probe(sc);
3833 return (err);
3836 static void
3837 mxge_update_stats(mxge_softc_t *sc)
3839 struct mxge_slice_state *ss;
3840 u_long ipackets = 0;
3841 u_long opackets = 0;
3842 #ifdef IFNET_BUF_RING
3843 u_long obytes = 0;
3844 u_long omcasts = 0;
3845 u_long odrops = 0;
3846 #endif
3847 u_long oerrors = 0;
3848 int slice;
3850 for (slice = 0; slice < sc->num_slices; slice++) {
3851 ss = &sc->ss[slice];
3852 ipackets += ss->ipackets;
3853 opackets += ss->opackets;
3854 #ifdef IFNET_BUF_RING
3855 obytes += ss->obytes;
3856 omcasts += ss->omcasts;
3857 odrops += ss->tx.br->br_drops;
3858 #endif
3859 oerrors += ss->oerrors;
3861 sc->ifp->if_ipackets = ipackets;
3862 sc->ifp->if_opackets = opackets;
3863 #ifdef IFNET_BUF_RING
3864 sc->ifp->if_obytes = obytes;
3865 sc->ifp->if_omcasts = omcasts;
3866 sc->ifp->if_snd.ifq_drops = odrops;
3867 #endif
3868 sc->ifp->if_oerrors = oerrors;
3871 static void
3872 mxge_tick(void *arg)
3874 mxge_softc_t *sc = arg;
3875 int err = 0;
3877 lwkt_serialize_enter(sc->ifp->if_serializer);
3878 /* aggregate stats from different slices */
3879 mxge_update_stats(sc);
3880 if (!sc->watchdog_countdown) {
3881 err = mxge_watchdog(sc);
3882 sc->watchdog_countdown = 4;
3884 sc->watchdog_countdown--;
3885 if (err == 0)
3886 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3887 lwkt_serialize_exit(sc->ifp->if_serializer);
3890 static int
3891 mxge_media_change(struct ifnet *ifp)
3893 return EINVAL;
3896 static int
3897 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3899 struct ifnet *ifp = sc->ifp;
3900 int real_mtu, old_mtu;
3901 int err = 0;
3904 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3905 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3906 return EINVAL;
3907 lwkt_serialize_enter(ifp->if_serializer);
3908 old_mtu = ifp->if_mtu;
3909 ifp->if_mtu = mtu;
3910 if (ifp->if_flags & IFF_RUNNING) {
3911 mxge_close(sc);
3912 err = mxge_open(sc);
3913 if (err != 0) {
3914 ifp->if_mtu = old_mtu;
3915 mxge_close(sc);
3916 (void) mxge_open(sc);
3919 lwkt_serialize_exit(ifp->if_serializer);
3920 return err;
3923 static void
3924 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3926 mxge_softc_t *sc = ifp->if_softc;
3929 if (sc == NULL)
3930 return;
3931 ifmr->ifm_status = IFM_AVALID;
3932 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3933 ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
3934 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
3937 static int
3938 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
3940 mxge_softc_t *sc = ifp->if_softc;
3941 struct ifreq *ifr = (struct ifreq *)data;
3942 int err, mask;
3944 (void)cr;
3945 err = 0;
3946 switch (command) {
3947 case SIOCSIFADDR:
3948 case SIOCGIFADDR:
3949 err = ether_ioctl(ifp, command, data);
3950 break;
3952 case SIOCSIFMTU:
3953 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3954 break;
3956 case SIOCSIFFLAGS:
3957 lwkt_serialize_enter(sc->ifp->if_serializer);
3958 if (sc->dying) {
3959 lwkt_serialize_exit(ifp->if_serializer);
3960 return EINVAL;
3962 if (ifp->if_flags & IFF_UP) {
3963 if (!(ifp->if_flags & IFF_RUNNING)) {
3964 err = mxge_open(sc);
3965 } else {
3966 /* take care of promis can allmulti
3967 flag chages */
3968 mxge_change_promisc(sc,
3969 ifp->if_flags & IFF_PROMISC);
3970 mxge_set_multicast_list(sc);
3972 } else {
3973 if (ifp->if_flags & IFF_RUNNING) {
3974 mxge_close(sc);
3977 lwkt_serialize_exit(ifp->if_serializer);
3978 break;
3980 case SIOCADDMULTI:
3981 case SIOCDELMULTI:
3982 lwkt_serialize_enter(sc->ifp->if_serializer);
3983 mxge_set_multicast_list(sc);
3984 lwkt_serialize_exit(sc->ifp->if_serializer);
3985 break;
3987 case SIOCSIFCAP:
3988 lwkt_serialize_enter(sc->ifp->if_serializer);
3989 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3990 if (mask & IFCAP_TXCSUM) {
3991 if (IFCAP_TXCSUM & ifp->if_capenable) {
3992 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
3993 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
3994 | CSUM_TSO);
3995 } else {
3996 ifp->if_capenable |= IFCAP_TXCSUM;
3997 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
3999 } else if (mask & IFCAP_RXCSUM) {
4000 if (IFCAP_RXCSUM & ifp->if_capenable) {
4001 ifp->if_capenable &= ~IFCAP_RXCSUM;
4002 sc->csum_flag = 0;
4003 } else {
4004 ifp->if_capenable |= IFCAP_RXCSUM;
4005 sc->csum_flag = 1;
4008 if (mask & IFCAP_TSO4) {
4009 if (IFCAP_TSO4 & ifp->if_capenable) {
4010 ifp->if_capenable &= ~IFCAP_TSO4;
4011 ifp->if_hwassist &= ~CSUM_TSO;
4012 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4013 ifp->if_capenable |= IFCAP_TSO4;
4014 ifp->if_hwassist |= CSUM_TSO;
4015 } else {
4016 kprintf("mxge requires tx checksum offload"
4017 " be enabled to use TSO\n");
4018 err = EINVAL;
4021 if (mask & IFCAP_LRO) {
4022 if (IFCAP_LRO & ifp->if_capenable)
4023 err = mxge_change_lro_locked(sc, 0);
4024 else
4025 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
4027 if (mask & IFCAP_VLAN_HWTAGGING)
4028 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4029 lwkt_serialize_exit(sc->ifp->if_serializer);
4030 VLAN_CAPABILITIES(ifp);
4032 break;
4034 case SIOCGIFMEDIA:
4035 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4036 &sc->media, command);
4037 break;
4039 default:
4040 err = ENOTTY;
4042 return err;
4045 static void
4046 mxge_fetch_tunables(mxge_softc_t *sc)
4049 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4050 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4051 &mxge_flow_control);
4052 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4053 &mxge_intr_coal_delay);
4054 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4055 &mxge_nvidia_ecrc_enable);
4056 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4057 &mxge_force_firmware);
4058 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4059 &mxge_deassert_wait);
4060 TUNABLE_INT_FETCH("hw.mxge.verbose",
4061 &mxge_verbose);
4062 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4063 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
4064 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4065 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4066 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4067 if (sc->lro_cnt != 0)
4068 mxge_lro_cnt = sc->lro_cnt;
4070 if (bootverbose)
4071 mxge_verbose = 1;
4072 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4073 mxge_intr_coal_delay = 30;
4074 if (mxge_ticks == 0)
4075 mxge_ticks = hz / 2;
4076 sc->pause = mxge_flow_control;
4077 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4078 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4079 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
4081 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4082 mxge_initial_mtu < ETHER_MIN_LEN)
4083 mxge_initial_mtu = ETHERMTU_JUMBO;
4087 static void
4088 mxge_free_slices(mxge_softc_t *sc)
4090 struct mxge_slice_state *ss;
4091 int i;
4094 if (sc->ss == NULL)
4095 return;
4097 for (i = 0; i < sc->num_slices; i++) {
4098 ss = &sc->ss[i];
4099 if (ss->fw_stats != NULL) {
4100 mxge_dma_free(&ss->fw_stats_dma);
4101 ss->fw_stats = NULL;
4102 #ifdef IFNET_BUF_RING
4103 if (ss->tx.br != NULL) {
4104 drbr_free(ss->tx.br, M_DEVBUF);
4105 ss->tx.br = NULL;
4107 #endif
4109 if (ss->rx_done.entry != NULL) {
4110 mxge_dma_free(&ss->rx_done.dma);
4111 ss->rx_done.entry = NULL;
4114 kfree(sc->ss, M_DEVBUF);
4115 sc->ss = NULL;
4118 static int
4119 mxge_alloc_slices(mxge_softc_t *sc)
4121 mxge_cmd_t cmd;
4122 struct mxge_slice_state *ss;
4123 size_t bytes;
4124 int err, i, max_intr_slots;
4126 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4127 if (err != 0) {
4128 device_printf(sc->dev, "Cannot determine rx ring size\n");
4129 return err;
4131 sc->rx_ring_size = cmd.data0;
4132 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4134 bytes = sizeof (*sc->ss) * sc->num_slices;
4135 sc->ss = kmalloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4136 if (sc->ss == NULL)
4137 return (ENOMEM);
4138 for (i = 0; i < sc->num_slices; i++) {
4139 ss = &sc->ss[i];
4141 ss->sc = sc;
4143 /* allocate per-slice rx interrupt queues */
4145 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4146 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4147 if (err != 0)
4148 goto abort;
4149 ss->rx_done.entry = ss->rx_done.dma.addr;
4150 bzero(ss->rx_done.entry, bytes);
4153 * allocate the per-slice firmware stats; stats
4154 * (including tx) are used used only on the first
4155 * slice for now
4157 #ifndef IFNET_BUF_RING
4158 if (i > 0)
4159 continue;
4160 #endif
4162 bytes = sizeof (*ss->fw_stats);
4163 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4164 sizeof (*ss->fw_stats), 64);
4165 if (err != 0)
4166 goto abort;
4167 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4168 #ifdef IFNET_BUF_RING
4169 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4170 &ss->tx.lock);
4171 #endif
4174 return (0);
4176 abort:
4177 mxge_free_slices(sc);
4178 return (ENOMEM);
4181 static void
4182 mxge_slice_probe(mxge_softc_t *sc)
4184 mxge_cmd_t cmd;
4185 char *old_fw;
4186 int msix_cnt, status, max_intr_slots;
4188 sc->num_slices = 1;
4190 * don't enable multiple slices if they are not enabled,
4191 * or if this is not an SMP system
4194 if (mxge_max_slices == 0 || mxge_max_slices == 1 || ncpus < 2)
4195 return;
4197 /* see how many MSI-X interrupts are available */
4198 msix_cnt = pci_msix_count(sc->dev);
4199 if (msix_cnt < 2)
4200 return;
4202 /* now load the slice aware firmware see what it supports */
4203 old_fw = sc->fw_name;
4204 if (old_fw == mxge_fw_aligned)
4205 sc->fw_name = mxge_fw_rss_aligned;
4206 else
4207 sc->fw_name = mxge_fw_rss_unaligned;
4208 status = mxge_load_firmware(sc, 0);
4209 if (status != 0) {
4210 device_printf(sc->dev, "Falling back to a single slice\n");
4211 return;
4214 /* try to send a reset command to the card to see if it
4215 is alive */
4216 memset(&cmd, 0, sizeof (cmd));
4217 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4218 if (status != 0) {
4219 device_printf(sc->dev, "failed reset\n");
4220 goto abort_with_fw;
4223 /* get rx ring size */
4224 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4225 if (status != 0) {
4226 device_printf(sc->dev, "Cannot determine rx ring size\n");
4227 goto abort_with_fw;
4229 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4231 /* tell it the size of the interrupt queues */
4232 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4233 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4234 if (status != 0) {
4235 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4236 goto abort_with_fw;
4239 /* ask the maximum number of slices it supports */
4240 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4241 if (status != 0) {
4242 device_printf(sc->dev,
4243 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4244 goto abort_with_fw;
4246 sc->num_slices = cmd.data0;
4247 if (sc->num_slices > msix_cnt)
4248 sc->num_slices = msix_cnt;
4250 if (mxge_max_slices == -1) {
4251 /* cap to number of CPUs in system */
4252 if (sc->num_slices > ncpus)
4253 sc->num_slices = ncpus;
4254 } else {
4255 if (sc->num_slices > mxge_max_slices)
4256 sc->num_slices = mxge_max_slices;
4258 /* make sure it is a power of two */
4259 while (sc->num_slices & (sc->num_slices - 1))
4260 sc->num_slices--;
4262 if (mxge_verbose)
4263 device_printf(sc->dev, "using %d slices\n",
4264 sc->num_slices);
4266 return;
4268 abort_with_fw:
4269 sc->fw_name = old_fw;
4270 (void) mxge_load_firmware(sc, 0);
4273 static int
4274 mxge_add_msix_irqs(mxge_softc_t *sc)
4276 size_t bytes;
4277 int count, err, i, rid;
4279 rid = PCIR_BAR(2);
4280 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4281 &rid, RF_ACTIVE);
4283 if (sc->msix_table_res == NULL) {
4284 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4285 return ENXIO;
4288 count = sc->num_slices;
4289 err = pci_alloc_msix(sc->dev, &count);
4290 if (err != 0) {
4291 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4292 "err = %d \n", sc->num_slices, err);
4293 goto abort_with_msix_table;
4295 if (count < sc->num_slices) {
4296 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4297 count, sc->num_slices);
4298 device_printf(sc->dev,
4299 "Try setting hw.mxge.max_slices to %d\n",
4300 count);
4301 err = ENOSPC;
4302 goto abort_with_msix;
4304 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4305 sc->msix_irq_res = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4306 if (sc->msix_irq_res == NULL) {
4307 err = ENOMEM;
4308 goto abort_with_msix;
4311 for (i = 0; i < sc->num_slices; i++) {
4312 rid = i + 1;
4313 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4314 SYS_RES_IRQ,
4315 &rid, RF_ACTIVE);
4316 if (sc->msix_irq_res[i] == NULL) {
4317 device_printf(sc->dev, "couldn't allocate IRQ res"
4318 " for message %d\n", i);
4319 err = ENXIO;
4320 goto abort_with_res;
4324 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4325 sc->msix_ih = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4327 for (i = 0; i < sc->num_slices; i++) {
4328 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4329 INTR_MPSAFE,
4330 mxge_intr, &sc->ss[i], &sc->msix_ih[i],
4331 sc->ifp->if_serializer);
4332 if (err != 0) {
4333 device_printf(sc->dev, "couldn't setup intr for "
4334 "message %d\n", i);
4335 goto abort_with_intr;
4339 if (mxge_verbose) {
4340 device_printf(sc->dev, "using %d msix IRQs:",
4341 sc->num_slices);
4342 for (i = 0; i < sc->num_slices; i++)
4343 kprintf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4344 kprintf("\n");
4346 return (0);
4348 abort_with_intr:
4349 for (i = 0; i < sc->num_slices; i++) {
4350 if (sc->msix_ih[i] != NULL) {
4351 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4352 sc->msix_ih[i]);
4353 sc->msix_ih[i] = NULL;
4356 kfree(sc->msix_ih, M_DEVBUF);
4359 abort_with_res:
4360 for (i = 0; i < sc->num_slices; i++) {
4361 rid = i + 1;
4362 if (sc->msix_irq_res[i] != NULL)
4363 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4364 sc->msix_irq_res[i]);
4365 sc->msix_irq_res[i] = NULL;
4367 kfree(sc->msix_irq_res, M_DEVBUF);
4370 abort_with_msix:
4371 pci_release_msi(sc->dev);
4373 abort_with_msix_table:
4374 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4375 sc->msix_table_res);
4377 return err;
4380 static int
4381 mxge_add_single_irq(mxge_softc_t *sc)
4383 int count, err, rid;
4385 count = pci_msi_count(sc->dev);
4386 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4387 rid = 1;
4388 } else {
4389 rid = 0;
4390 sc->legacy_irq = 1;
4392 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4393 1, RF_SHAREABLE | RF_ACTIVE);
4394 if (sc->irq_res == NULL) {
4395 device_printf(sc->dev, "could not alloc interrupt\n");
4396 return ENXIO;
4398 if (mxge_verbose)
4399 device_printf(sc->dev, "using %s irq %ld\n",
4400 sc->legacy_irq ? "INTx" : "MSI",
4401 rman_get_start(sc->irq_res));
4402 err = bus_setup_intr(sc->dev, sc->irq_res,
4403 INTR_MPSAFE,
4404 mxge_intr, &sc->ss[0], &sc->ih,
4405 sc->ifp->if_serializer);
4406 if (err != 0) {
4407 bus_release_resource(sc->dev, SYS_RES_IRQ,
4408 sc->legacy_irq ? 0 : 1, sc->irq_res);
4409 if (!sc->legacy_irq)
4410 pci_release_msi(sc->dev);
4412 return err;
4415 static void
4416 mxge_rem_msix_irqs(mxge_softc_t *sc)
4418 int i, rid;
4420 for (i = 0; i < sc->num_slices; i++) {
4421 if (sc->msix_ih[i] != NULL) {
4422 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4423 sc->msix_ih[i]);
4424 sc->msix_ih[i] = NULL;
4427 kfree(sc->msix_ih, M_DEVBUF);
4429 for (i = 0; i < sc->num_slices; i++) {
4430 rid = i + 1;
4431 if (sc->msix_irq_res[i] != NULL)
4432 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4433 sc->msix_irq_res[i]);
4434 sc->msix_irq_res[i] = NULL;
4436 kfree(sc->msix_irq_res, M_DEVBUF);
4438 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4439 sc->msix_table_res);
4441 pci_release_msi(sc->dev);
4442 return;
4445 static void
4446 mxge_rem_single_irq(mxge_softc_t *sc)
4448 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4449 bus_release_resource(sc->dev, SYS_RES_IRQ,
4450 sc->legacy_irq ? 0 : 1, sc->irq_res);
4451 if (!sc->legacy_irq)
4452 pci_release_msi(sc->dev);
4455 static void
4456 mxge_rem_irq(mxge_softc_t *sc)
4458 if (sc->num_slices > 1)
4459 mxge_rem_msix_irqs(sc);
4460 else
4461 mxge_rem_single_irq(sc);
4464 static int
4465 mxge_add_irq(mxge_softc_t *sc)
4467 int err;
4469 if (sc->num_slices > 1)
4470 err = mxge_add_msix_irqs(sc);
4471 else
4472 err = mxge_add_single_irq(sc);
4474 if (0 && err == 0 && sc->num_slices > 1) {
4475 mxge_rem_msix_irqs(sc);
4476 err = mxge_add_msix_irqs(sc);
4478 return err;
4482 static int
4483 mxge_attach(device_t dev)
4485 mxge_softc_t *sc = device_get_softc(dev);
4486 struct ifnet *ifp = &sc->arpcom.ac_if;
4487 int err, rid;
4490 * avoid rewriting half the lines in this file to use
4491 * &sc->arpcom.ac_if instead
4493 sc->ifp = ifp;
4494 sc->dev = dev;
4495 mxge_fetch_tunables(sc);
4497 err = bus_dma_tag_create(NULL, /* parent */
4498 1, /* alignment */
4499 0, /* boundary */
4500 BUS_SPACE_MAXADDR, /* low */
4501 BUS_SPACE_MAXADDR, /* high */
4502 NULL, NULL, /* filter */
4503 65536 + 256, /* maxsize */
4504 MXGE_MAX_SEND_DESC, /* num segs */
4505 65536, /* maxsegsize */
4506 0, /* flags */
4507 &sc->parent_dmat); /* tag */
4509 if (err != 0) {
4510 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4511 err);
4512 goto abort_with_nothing;
4515 sc->ifp = ifp;
4516 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4518 callout_init_mp(&sc->co_hdl);
4520 mxge_setup_cfg_space(sc);
4522 /* Map the board into the kernel */
4523 rid = PCIR_BARS;
4524 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4525 ~0, 1, RF_ACTIVE);
4526 if (sc->mem_res == NULL) {
4527 device_printf(dev, "could not map memory\n");
4528 err = ENXIO;
4529 goto abort_with_nothing;
4531 sc->sram = rman_get_virtual(sc->mem_res);
4532 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4533 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4534 device_printf(dev, "impossible memory region size %ld\n",
4535 rman_get_size(sc->mem_res));
4536 err = ENXIO;
4537 goto abort_with_mem_res;
4540 /* make NULL terminated copy of the EEPROM strings section of
4541 lanai SRAM */
4542 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4543 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4544 rman_get_bushandle(sc->mem_res),
4545 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4546 sc->eeprom_strings,
4547 MXGE_EEPROM_STRINGS_SIZE - 2);
4548 err = mxge_parse_strings(sc);
4549 if (err != 0)
4550 goto abort_with_mem_res;
4552 /* Enable write combining for efficient use of PCIe bus */
4553 mxge_enable_wc(sc);
4555 /* Allocate the out of band dma memory */
4556 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4557 sizeof (mxge_cmd_t), 64);
4558 if (err != 0)
4559 goto abort_with_mem_res;
4560 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4561 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4562 if (err != 0)
4563 goto abort_with_cmd_dma;
4565 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4566 if (err != 0)
4567 goto abort_with_zeropad_dma;
4569 /* select & load the firmware */
4570 err = mxge_select_firmware(sc);
4571 if (err != 0)
4572 goto abort_with_dmabench;
4573 sc->intr_coal_delay = mxge_intr_coal_delay;
4575 mxge_slice_probe(sc);
4576 err = mxge_alloc_slices(sc);
4577 if (err != 0)
4578 goto abort_with_dmabench;
4580 err = mxge_reset(sc, 0);
4581 if (err != 0)
4582 goto abort_with_slices;
4584 err = mxge_alloc_rings(sc);
4585 if (err != 0) {
4586 device_printf(sc->dev, "failed to allocate rings\n");
4587 goto abort_with_dmabench;
4590 err = mxge_add_irq(sc);
4591 if (err != 0) {
4592 device_printf(sc->dev, "failed to add irq\n");
4593 goto abort_with_rings;
4596 ifp->if_baudrate = IF_Gbps(10UL);
4597 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4598 IFCAP_VLAN_MTU;
4599 #ifdef INET
4600 ifp->if_capabilities |= IFCAP_LRO;
4601 #endif
4603 #ifdef MXGE_NEW_VLAN_API
4604 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4605 #endif
4607 sc->max_mtu = mxge_max_mtu(sc);
4608 if (sc->max_mtu >= 9000)
4609 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4610 else
4611 device_printf(dev, "MTU limited to %d. Install "
4612 "latest firmware for 9000 byte jumbo support\n",
4613 sc->max_mtu - ETHER_HDR_LEN);
4614 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4615 ifp->if_capenable = ifp->if_capabilities;
4616 if (sc->lro_cnt == 0)
4617 ifp->if_capenable &= ~IFCAP_LRO;
4618 sc->csum_flag = 1;
4619 ifp->if_init = mxge_init;
4620 ifp->if_softc = sc;
4621 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4622 ifp->if_ioctl = mxge_ioctl;
4623 ifp->if_start = mxge_start;
4624 /* Initialise the ifmedia structure */
4625 ifmedia_init(&sc->media, 0, mxge_media_change,
4626 mxge_media_status);
4627 mxge_set_media(sc, IFM_ETHER | IFM_AUTO);
4628 mxge_media_probe(sc);
4629 sc->dying = 0;
4630 ether_ifattach(ifp, sc->mac_addr, NULL);
4631 /* ether_ifattach sets mtu to ETHERMTU */
4632 if (mxge_initial_mtu != ETHERMTU)
4633 mxge_change_mtu(sc, mxge_initial_mtu);
4635 mxge_add_sysctls(sc);
4636 #ifdef IFNET_BUF_RING
4637 ifp->if_transmit = mxge_transmit;
4638 ifp->if_qflush = mxge_qflush;
4639 #endif
4640 return 0;
4642 abort_with_rings:
4643 mxge_free_rings(sc);
4644 abort_with_slices:
4645 mxge_free_slices(sc);
4646 abort_with_dmabench:
4647 mxge_dma_free(&sc->dmabench_dma);
4648 abort_with_zeropad_dma:
4649 mxge_dma_free(&sc->zeropad_dma);
4650 abort_with_cmd_dma:
4651 mxge_dma_free(&sc->cmd_dma);
4652 abort_with_mem_res:
4653 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4654 pci_disable_busmaster(dev);
4655 bus_dma_tag_destroy(sc->parent_dmat);
4656 abort_with_nothing:
4657 return err;
4660 static int
4661 mxge_detach(device_t dev)
4663 mxge_softc_t *sc = device_get_softc(dev);
4665 lwkt_serialize_enter(sc->ifp->if_serializer);
4666 sc->dying = 1;
4667 if (sc->ifp->if_flags & IFF_RUNNING)
4668 mxge_close(sc);
4670 * XXX: race: the callout callback could be spinning on
4671 * the serializer and run anyway
4673 callout_stop(&sc->co_hdl);
4674 lwkt_serialize_exit(sc->ifp->if_serializer);
4676 ether_ifdetach(sc->ifp);
4677 ifmedia_removeall(&sc->media);
4678 mxge_dummy_rdma(sc, 0);
4679 mxge_rem_sysctls(sc);
4680 mxge_rem_irq(sc);
4681 mxge_free_rings(sc);
4682 mxge_free_slices(sc);
4683 mxge_dma_free(&sc->dmabench_dma);
4684 mxge_dma_free(&sc->zeropad_dma);
4685 mxge_dma_free(&sc->cmd_dma);
4686 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4687 pci_disable_busmaster(dev);
4688 bus_dma_tag_destroy(sc->parent_dmat);
4689 return 0;
4692 static int
4693 mxge_shutdown(device_t dev)
4695 return 0;
4699 This file uses Myri10GE driver indentation.
4701 Local Variables:
4702 c-file-style:"linux"
4703 tab-width:8
4704 End: