linprocfs - Introduce /proc/mounts
[dragonfly.git] / sys / dev / netif / mxge / if_mxge.c
blob9604acd871dacb81d648e8dde1ffb31dfd5feab6
1 /******************************************************************************
3 Copyright (c) 2006-2009, Myricom Inc.
4 All rights reserved.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 $FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $
30 ***************************************************************************/
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/linker.h>
35 #include <sys/firmware.h>
36 #include <sys/endian.h>
37 #include <sys/in_cksum.h>
38 #include <sys/sockio.h>
39 #include <sys/mbuf.h>
40 #include <sys/malloc.h>
41 #include <sys/kernel.h>
42 #include <sys/module.h>
43 #include <sys/serialize.h>
44 #include <sys/socket.h>
45 #include <sys/sysctl.h>
47 /* count xmits ourselves, rather than via drbr */
48 #define NO_SLOW_STATS
49 #include <net/if.h>
50 #include <net/if_arp.h>
51 #include <net/ifq_var.h>
52 #include <net/ethernet.h>
53 #include <net/if_dl.h>
54 #include <net/if_media.h>
56 #include <net/bpf.h>
58 #include <net/if_types.h>
59 #include <net/vlan/if_vlan_var.h>
60 #include <net/zlib.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/in.h>
64 #include <netinet/ip.h>
65 #include <netinet/tcp.h>
67 #include <sys/bus.h>
68 #include <sys/rman.h>
70 #include <bus/pci/pcireg.h>
71 #include <bus/pci/pcivar.h>
72 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
74 #include <vm/vm.h> /* for pmap_mapdev() */
75 #include <vm/pmap.h>
77 #if defined(__i386) || defined(__x86_64)
78 #include <machine/specialreg.h>
79 #endif
81 #include <dev/netif/mxge/mxge_mcp.h>
82 #include <dev/netif/mxge/mcp_gen_header.h>
83 /*#define MXGE_FAKE_IFP*/
84 #include <dev/netif/mxge/if_mxge_var.h>
85 #ifdef IFNET_BUF_RING
86 #include <sys/buf_ring.h>
87 #endif
89 #include "opt_inet.h"
91 /* tunable params */
92 static int mxge_nvidia_ecrc_enable = 1;
93 static int mxge_force_firmware = 0;
94 static int mxge_intr_coal_delay = 30;
95 static int mxge_deassert_wait = 1;
96 static int mxge_flow_control = 1;
97 static int mxge_verbose = 0;
98 static int mxge_lro_cnt = 8;
99 static int mxge_ticks;
100 static int mxge_max_slices = 1;
101 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
102 static int mxge_always_promisc = 0;
103 /* XXX: not yet */
104 /* static int mxge_initial_mtu = ETHERMTU_JUMBO; */
105 static int mxge_initial_mtu = ETHERMTU;
106 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
107 static char *mxge_fw_aligned = "mxge_eth_z8e";
108 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
109 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
111 static int mxge_probe(device_t dev);
112 static int mxge_attach(device_t dev);
113 static int mxge_detach(device_t dev);
114 static int mxge_shutdown(device_t dev);
115 static void mxge_intr(void *arg);
117 static device_method_t mxge_methods[] =
119 /* Device interface */
120 DEVMETHOD(device_probe, mxge_probe),
121 DEVMETHOD(device_attach, mxge_attach),
122 DEVMETHOD(device_detach, mxge_detach),
123 DEVMETHOD(device_shutdown, mxge_shutdown),
124 {0, 0}
127 static driver_t mxge_driver =
129 "mxge",
130 mxge_methods,
131 sizeof(mxge_softc_t),
134 static devclass_t mxge_devclass;
136 /* Declare ourselves to be a child of the PCI bus.*/
137 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
138 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
139 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
141 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
142 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
143 static int mxge_close(mxge_softc_t *sc);
144 static int mxge_open(mxge_softc_t *sc);
145 static void mxge_tick(void *arg);
147 /* XXX: we don't have Large Receive Offload support yet */
148 inline int
149 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
151 (void)ss;
152 (void)m_head;
153 (void)csum;
154 return 1;
157 inline void
158 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
160 (void)ss;
161 (void)lro;
164 static int
165 mxge_probe(device_t dev)
167 int rev;
170 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
171 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
172 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
173 rev = pci_get_revid(dev);
174 switch (rev) {
175 case MXGE_PCI_REV_Z8E:
176 device_set_desc(dev, "Myri10G-PCIE-8A");
177 break;
178 case MXGE_PCI_REV_Z8ES:
179 device_set_desc(dev, "Myri10G-PCIE-8B");
180 break;
181 default:
182 device_set_desc(dev, "Myri10G-PCIE-8??");
183 device_printf(dev, "Unrecognized rev %d NIC\n",
184 rev);
185 break;
187 return 0;
189 return ENXIO;
192 static void
193 mxge_enable_wc(mxge_softc_t *sc)
195 #if 0
196 #if defined(__i386) || defined(__x86_64)
197 vm_offset_t len;
198 int err;
200 sc->wc = 1;
201 len = rman_get_size(sc->mem_res);
202 err = pmap_change_attr((vm_offset_t) sc->sram,
203 len, PAT_WRITE_COMBINING);
204 if (err != 0) {
205 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
206 err);
207 sc->wc = 0;
209 #endif
210 #else
211 sc->wc = 0; /* TBD: PAT support */
212 #endif
216 /* callback to get our DMA address */
217 static void
218 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
219 int error)
221 if (error == 0) {
222 *(bus_addr_t *) arg = segs->ds_addr;
226 static int
227 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
228 bus_size_t alignment)
230 int err;
231 device_t dev = sc->dev;
232 bus_size_t boundary, maxsegsize;
234 if (bytes > 4096 && alignment == 4096) {
235 boundary = 0;
236 maxsegsize = bytes;
237 } else {
238 boundary = 4096;
239 maxsegsize = 4096;
242 /* allocate DMAable memory tags */
243 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
244 alignment, /* alignment */
245 boundary, /* boundary */
246 BUS_SPACE_MAXADDR, /* low */
247 BUS_SPACE_MAXADDR, /* high */
248 NULL, NULL, /* filter */
249 bytes, /* maxsize */
250 1, /* num segs */
251 maxsegsize, /* maxsegsize */
252 BUS_DMA_COHERENT, /* flags */
253 &dma->dmat); /* tag */
254 if (err != 0) {
255 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
256 return err;
259 /* allocate DMAable memory & map */
260 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
261 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
262 | BUS_DMA_ZERO), &dma->map);
263 if (err != 0) {
264 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
265 goto abort_with_dmat;
268 /* load the memory */
269 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
270 mxge_dmamap_callback,
271 (void *)&dma->bus_addr, 0);
272 if (err != 0) {
273 device_printf(dev, "couldn't load map (err = %d)\n", err);
274 goto abort_with_mem;
276 return 0;
278 abort_with_mem:
279 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
280 abort_with_dmat:
281 (void)bus_dma_tag_destroy(dma->dmat);
282 return err;
286 static void
287 mxge_dma_free(mxge_dma_t *dma)
289 bus_dmamap_unload(dma->dmat, dma->map);
290 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
291 (void)bus_dma_tag_destroy(dma->dmat);
295 * The eeprom strings on the lanaiX have the format
296 * SN=x\0
297 * MAC=x:x:x:x:x:x\0
298 * PC=text\0
301 static int
302 mxge_parse_strings(mxge_softc_t *sc)
304 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
306 char *ptr, *limit;
307 int i, found_mac;
309 ptr = sc->eeprom_strings;
310 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
311 found_mac = 0;
312 while (ptr < limit && *ptr != '\0') {
313 if (memcmp(ptr, "MAC=", 4) == 0) {
314 ptr += 1;
315 sc->mac_addr_string = ptr;
316 for (i = 0; i < 6; i++) {
317 ptr += 3;
318 if ((ptr + 2) > limit)
319 goto abort;
320 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
321 found_mac = 1;
323 } else if (memcmp(ptr, "PC=", 3) == 0) {
324 ptr += 3;
325 strncpy(sc->product_code_string, ptr,
326 sizeof (sc->product_code_string) - 1);
327 } else if (memcmp(ptr, "SN=", 3) == 0) {
328 ptr += 3;
329 strncpy(sc->serial_number_string, ptr,
330 sizeof (sc->serial_number_string) - 1);
332 MXGE_NEXT_STRING(ptr);
335 if (found_mac)
336 return 0;
338 abort:
339 device_printf(sc->dev, "failed to parse eeprom_strings\n");
341 return ENXIO;
344 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
345 static void
346 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
348 uint32_t val;
349 unsigned long base, off;
350 char *va, *cfgptr;
351 device_t pdev, mcp55;
352 uint16_t vendor_id, device_id, word;
353 uintptr_t bus, slot, func, ivend, idev;
354 uint32_t *ptr32;
357 if (!mxge_nvidia_ecrc_enable)
358 return;
360 pdev = device_get_parent(device_get_parent(sc->dev));
361 if (pdev == NULL) {
362 device_printf(sc->dev, "could not find parent?\n");
363 return;
365 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
366 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
368 if (vendor_id != 0x10de)
369 return;
371 base = 0;
373 if (device_id == 0x005d) {
374 /* ck804, base address is magic */
375 base = 0xe0000000UL;
376 } else if (device_id >= 0x0374 && device_id <= 0x378) {
377 /* mcp55, base address stored in chipset */
378 mcp55 = pci_find_bsf(0, 0, 0);
379 if (mcp55 &&
380 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
381 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
382 word = pci_read_config(mcp55, 0x90, 2);
383 base = ((unsigned long)word & 0x7ffeU) << 25;
386 if (!base)
387 return;
389 /* XXXX
390 Test below is commented because it is believed that doing
391 config read/write beyond 0xff will access the config space
392 for the next larger function. Uncomment this and remove
393 the hacky pmap_mapdev() way of accessing config space when
394 FreeBSD grows support for extended pcie config space access
396 #if 0
397 /* See if we can, by some miracle, access the extended
398 config space */
399 val = pci_read_config(pdev, 0x178, 4);
400 if (val != 0xffffffff) {
401 val |= 0x40;
402 pci_write_config(pdev, 0x178, val, 4);
403 return;
405 #endif
406 /* Rather than using normal pci config space writes, we must
407 * map the Nvidia config space ourselves. This is because on
408 * opteron/nvidia class machine the 0xe000000 mapping is
409 * handled by the nvidia chipset, that means the internal PCI
410 * device (the on-chip northbridge), or the amd-8131 bridge
411 * and things behind them are not visible by this method.
414 BUS_READ_IVAR(device_get_parent(pdev), pdev,
415 PCI_IVAR_BUS, &bus);
416 BUS_READ_IVAR(device_get_parent(pdev), pdev,
417 PCI_IVAR_SLOT, &slot);
418 BUS_READ_IVAR(device_get_parent(pdev), pdev,
419 PCI_IVAR_FUNCTION, &func);
420 BUS_READ_IVAR(device_get_parent(pdev), pdev,
421 PCI_IVAR_VENDOR, &ivend);
422 BUS_READ_IVAR(device_get_parent(pdev), pdev,
423 PCI_IVAR_DEVICE, &idev);
425 off = base
426 + 0x00100000UL * (unsigned long)bus
427 + 0x00001000UL * (unsigned long)(func
428 + 8 * slot);
430 /* map it into the kernel */
431 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
434 if (va == NULL) {
435 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
436 return;
438 /* get a pointer to the config space mapped into the kernel */
439 cfgptr = va + (off & PAGE_MASK);
441 /* make sure that we can really access it */
442 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
443 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
444 if (! (vendor_id == ivend && device_id == idev)) {
445 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
446 vendor_id, device_id);
447 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
448 return;
451 ptr32 = (uint32_t*)(cfgptr + 0x178);
452 val = *ptr32;
454 if (val == 0xffffffff) {
455 device_printf(sc->dev, "extended mapping failed\n");
456 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
457 return;
459 *ptr32 = val | 0x40;
460 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
461 if (mxge_verbose)
462 device_printf(sc->dev,
463 "Enabled ECRC on upstream Nvidia bridge "
464 "at %d:%d:%d\n",
465 (int)bus, (int)slot, (int)func);
466 return;
468 #else
469 static void
470 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
472 device_printf(sc->dev,
473 "Nforce 4 chipset on non-x86/x86_64!?!?!\n");
474 return;
476 #endif
479 static int
480 mxge_dma_test(mxge_softc_t *sc, int test_type)
482 mxge_cmd_t cmd;
483 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
484 int status;
485 uint32_t len;
486 char *test = " ";
489 /* Run a small DMA test.
490 * The magic multipliers to the length tell the firmware
491 * to do DMA read, write, or read+write tests. The
492 * results are returned in cmd.data0. The upper 16
493 * bits of the return is the number of transfers completed.
494 * The lower 16 bits is the time in 0.5us ticks that the
495 * transfers took to complete.
498 len = sc->tx_boundary;
500 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
501 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
502 cmd.data2 = len * 0x10000;
503 status = mxge_send_cmd(sc, test_type, &cmd);
504 if (status != 0) {
505 test = "read";
506 goto abort;
508 sc->read_dma = ((cmd.data0>>16) * len * 2) /
509 (cmd.data0 & 0xffff);
510 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
511 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
512 cmd.data2 = len * 0x1;
513 status = mxge_send_cmd(sc, test_type, &cmd);
514 if (status != 0) {
515 test = "write";
516 goto abort;
518 sc->write_dma = ((cmd.data0>>16) * len * 2) /
519 (cmd.data0 & 0xffff);
521 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
522 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
523 cmd.data2 = len * 0x10001;
524 status = mxge_send_cmd(sc, test_type, &cmd);
525 if (status != 0) {
526 test = "read/write";
527 goto abort;
529 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
530 (cmd.data0 & 0xffff);
532 abort:
533 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
534 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
535 test, status);
537 return status;
541 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
542 * when the PCI-E Completion packets are aligned on an 8-byte
543 * boundary. Some PCI-E chip sets always align Completion packets; on
544 * the ones that do not, the alignment can be enforced by enabling
545 * ECRC generation (if supported).
547 * When PCI-E Completion packets are not aligned, it is actually more
548 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
550 * If the driver can neither enable ECRC nor verify that it has
551 * already been enabled, then it must use a firmware image which works
552 * around unaligned completion packets (ethp_z8e.dat), and it should
553 * also ensure that it never gives the device a Read-DMA which is
554 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
555 * enabled, then the driver should use the aligned (eth_z8e.dat)
556 * firmware image, and set tx_boundary to 4KB.
559 static int
560 mxge_firmware_probe(mxge_softc_t *sc)
562 device_t dev = sc->dev;
563 int reg, status;
564 uint16_t pectl;
566 sc->tx_boundary = 4096;
568 * Verify the max read request size was set to 4KB
569 * before trying the test with 4KB.
571 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
572 pectl = pci_read_config(dev, reg + 0x8, 2);
573 if ((pectl & (5 << 12)) != (5 << 12)) {
574 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
575 pectl);
576 sc->tx_boundary = 2048;
581 * load the optimized firmware (which assumes aligned PCIe
582 * completions) in order to see if it works on this host.
584 sc->fw_name = mxge_fw_aligned;
585 status = mxge_load_firmware(sc, 1);
586 if (status != 0) {
587 return status;
591 * Enable ECRC if possible
593 mxge_enable_nvidia_ecrc(sc);
596 * Run a DMA test which watches for unaligned completions and
597 * aborts on the first one seen.
600 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
601 if (status == 0)
602 return 0; /* keep the aligned firmware */
604 if (status != E2BIG)
605 device_printf(dev, "DMA test failed: %d\n", status);
606 if (status == ENOSYS)
607 device_printf(dev, "Falling back to ethp! "
608 "Please install up to date fw\n");
609 return status;
612 static int
613 mxge_select_firmware(mxge_softc_t *sc)
615 int aligned = 0;
618 if (mxge_force_firmware != 0) {
619 if (mxge_force_firmware == 1)
620 aligned = 1;
621 else
622 aligned = 0;
623 if (mxge_verbose)
624 device_printf(sc->dev,
625 "Assuming %s completions (forced)\n",
626 aligned ? "aligned" : "unaligned");
627 goto abort;
630 /* if the PCIe link width is 4 or less, we can use the aligned
631 firmware and skip any checks */
632 if (sc->link_width != 0 && sc->link_width <= 4) {
633 device_printf(sc->dev,
634 "PCIe x%d Link, expect reduced performance\n",
635 sc->link_width);
636 aligned = 1;
637 goto abort;
640 if (0 == mxge_firmware_probe(sc))
641 return 0;
643 abort:
644 if (aligned) {
645 sc->fw_name = mxge_fw_aligned;
646 sc->tx_boundary = 4096;
647 } else {
648 sc->fw_name = mxge_fw_unaligned;
649 sc->tx_boundary = 2048;
651 return (mxge_load_firmware(sc, 0));
654 union qualhack
656 const char *ro_char;
657 char *rw_char;
660 static int
661 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
665 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
666 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
667 be32toh(hdr->mcp_type));
668 return EIO;
671 /* save firmware version for sysctl */
672 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
673 if (mxge_verbose)
674 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
676 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
677 &sc->fw_ver_minor, &sc->fw_ver_tiny);
679 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
680 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
681 device_printf(sc->dev, "Found firmware version %s\n",
682 sc->fw_version);
683 device_printf(sc->dev, "Driver needs %d.%d\n",
684 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
685 return EINVAL;
687 return 0;
691 #if 0
692 static void *
693 z_alloc(void *nil, u_int items, u_int size)
695 void *ptr;
697 ptr = kmalloc(items * size, M_TEMP, M_NOWAIT);
698 return ptr;
701 static void
702 z_free(void *nil, void *ptr)
704 kfree(ptr, M_TEMP);
706 #endif
708 static int
709 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
711 struct fw_image *fw;
712 const mcp_gen_header_t *hdr;
713 unsigned hdr_offset;
714 int status;
715 unsigned int i;
716 char dummy;
717 size_t fw_len;
719 fw = firmware_image_load(sc->fw_name, NULL);
720 if (fw == NULL) {
721 device_printf(sc->dev, "Could not find firmware image %s\n",
722 sc->fw_name);
723 return ENOENT;
725 #if 0
726 /* setup zlib and decompress f/w */
727 bzero(&zs, sizeof (zs));
728 zs.zalloc = z_alloc;
729 zs.zfree = z_free;
730 status = inflateInit(&zs);
731 if (status != Z_OK) {
732 status = EIO;
733 goto abort_with_fw;
736 /* the uncompressed size is stored as the firmware version,
737 which would otherwise go unused */
738 fw_len = (size_t) fw->version;
739 inflate_buffer = kmalloc(fw_len, M_TEMP, M_NOWAIT);
740 if (inflate_buffer == NULL)
741 goto abort_with_zs;
742 zs.avail_in = fw->datasize;
743 zs.next_in = __DECONST(char *, fw->data);
744 zs.avail_out = fw_len;
745 zs.next_out = inflate_buffer;
746 status = inflate(&zs, Z_FINISH);
747 if (status != Z_STREAM_END) {
748 device_printf(sc->dev, "zlib %d\n", status);
749 status = EIO;
750 goto abort_with_buffer;
752 #endif
753 fw_len = fw->fw_imglen;
754 /* check id */
755 hdr_offset = htobe32(*(const uint32_t *)
756 (fw->fw_image + MCP_HEADER_PTR_OFFSET));
757 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
758 device_printf(sc->dev, "Bad firmware file");
759 status = EIO;
760 goto abort_with_fw;
762 hdr = (const void*)(fw->fw_image + hdr_offset);
764 status = mxge_validate_firmware(sc, hdr);
765 if (status != 0)
766 goto abort_with_fw;
768 /* Copy the inflated firmware to NIC SRAM. */
769 for (i = 0; i < fw_len; i += 256) {
770 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
771 fw->fw_image + i,
772 min(256U, (unsigned)(fw_len - i)));
773 wmb();
774 dummy = *sc->sram;
775 wmb();
778 *limit = fw_len;
779 status = 0;
780 #if 0
781 abort_with_buffer:
782 kfree(inflate_buffer, M_TEMP);
783 abort_with_zs:
784 inflateEnd(&zs);
785 #endif
786 abort_with_fw:
787 firmware_image_unload(fw);
788 return status;
792 * Enable or disable periodic RDMAs from the host to make certain
793 * chipsets resend dropped PCIe messages
796 static void
797 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
799 char buf_bytes[72];
800 volatile uint32_t *confirm;
801 volatile char *submit;
802 uint32_t *buf, dma_low, dma_high;
803 int i;
805 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
807 /* clear confirmation addr */
808 confirm = (volatile uint32_t *)sc->cmd;
809 *confirm = 0;
810 wmb();
812 /* send an rdma command to the PCIe engine, and wait for the
813 response in the confirmation address. The firmware should
814 write a -1 there to indicate it is alive and well
817 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
818 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
819 buf[0] = htobe32(dma_high); /* confirm addr MSW */
820 buf[1] = htobe32(dma_low); /* confirm addr LSW */
821 buf[2] = htobe32(0xffffffff); /* confirm data */
822 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
823 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
824 buf[3] = htobe32(dma_high); /* dummy addr MSW */
825 buf[4] = htobe32(dma_low); /* dummy addr LSW */
826 buf[5] = htobe32(enable); /* enable? */
829 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
831 mxge_pio_copy(submit, buf, 64);
832 wmb();
833 DELAY(1000);
834 wmb();
835 i = 0;
836 while (*confirm != 0xffffffff && i < 20) {
837 DELAY(1000);
838 i++;
840 if (*confirm != 0xffffffff) {
841 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
842 (enable ? "enable" : "disable"), confirm,
843 *confirm);
845 return;
848 static int
849 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
851 mcp_cmd_t *buf;
852 char buf_bytes[sizeof(*buf) + 8];
853 volatile mcp_cmd_response_t *response = sc->cmd;
854 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
855 uint32_t dma_low, dma_high;
856 int err, sleep_total = 0;
859 * We may be called during attach, before if_serializer is available.
860 * This is not a fast path, just check for NULL
863 if (sc->ifp->if_serializer)
864 ASSERT_SERIALIZED(sc->ifp->if_serializer);
866 /* ensure buf is aligned to 8 bytes */
867 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
869 buf->data0 = htobe32(data->data0);
870 buf->data1 = htobe32(data->data1);
871 buf->data2 = htobe32(data->data2);
872 buf->cmd = htobe32(cmd);
873 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
874 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
876 buf->response_addr.low = htobe32(dma_low);
877 buf->response_addr.high = htobe32(dma_high);
880 response->result = 0xffffffff;
881 wmb();
882 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
884 /* wait up to 20ms */
885 err = EAGAIN;
886 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
887 bus_dmamap_sync(sc->cmd_dma.dmat,
888 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
889 wmb();
890 switch (be32toh(response->result)) {
891 case 0:
892 data->data0 = be32toh(response->data);
893 err = 0;
894 break;
895 case 0xffffffff:
896 DELAY(1000);
897 break;
898 case MXGEFW_CMD_UNKNOWN:
899 err = ENOSYS;
900 break;
901 case MXGEFW_CMD_ERROR_UNALIGNED:
902 err = E2BIG;
903 break;
904 case MXGEFW_CMD_ERROR_BUSY:
905 err = EBUSY;
906 break;
907 default:
908 device_printf(sc->dev,
909 "mxge: command %d "
910 "failed, result = %d\n",
911 cmd, be32toh(response->result));
912 err = ENXIO;
913 break;
915 if (err != EAGAIN)
916 break;
918 if (err == EAGAIN)
919 device_printf(sc->dev, "mxge: command %d timed out"
920 "result = %d\n",
921 cmd, be32toh(response->result));
922 return err;
925 static int
926 mxge_adopt_running_firmware(mxge_softc_t *sc)
928 struct mcp_gen_header *hdr;
929 const size_t bytes = sizeof (struct mcp_gen_header);
930 size_t hdr_offset;
931 int status;
933 /* find running firmware header */
934 hdr_offset = htobe32(*(volatile uint32_t *)
935 (sc->sram + MCP_HEADER_PTR_OFFSET));
937 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
938 device_printf(sc->dev,
939 "Running firmware has bad header offset (%d)\n",
940 (int)hdr_offset);
941 return EIO;
944 /* copy header of running firmware from SRAM to host memory to
945 * validate firmware */
946 hdr = kmalloc(bytes, M_DEVBUF, M_NOWAIT);
947 if (hdr == NULL) {
948 device_printf(sc->dev, "could not kmalloc firmware hdr\n");
949 return ENOMEM;
951 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
952 rman_get_bushandle(sc->mem_res),
953 hdr_offset, (char *)hdr, bytes);
954 status = mxge_validate_firmware(sc, hdr);
955 kfree(hdr, M_DEVBUF);
958 * check to see if adopted firmware has bug where adopting
959 * it will cause broadcasts to be filtered unless the NIC
960 * is kept in ALLMULTI mode
962 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
963 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
964 sc->adopted_rx_filter_bug = 1;
965 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
966 "working around rx filter bug\n",
967 sc->fw_ver_major, sc->fw_ver_minor,
968 sc->fw_ver_tiny);
971 return status;
975 static int
976 mxge_load_firmware(mxge_softc_t *sc, int adopt)
978 volatile uint32_t *confirm;
979 volatile char *submit;
980 char buf_bytes[72];
981 uint32_t *buf, size, dma_low, dma_high;
982 int status, i;
984 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
986 size = sc->sram_size;
987 status = mxge_load_firmware_helper(sc, &size);
988 if (status) {
989 if (!adopt)
990 return status;
991 /* Try to use the currently running firmware, if
992 it is new enough */
993 status = mxge_adopt_running_firmware(sc);
994 if (status) {
995 device_printf(sc->dev,
996 "failed to adopt running firmware\n");
997 return status;
999 device_printf(sc->dev,
1000 "Successfully adopted running firmware\n");
1001 if (sc->tx_boundary == 4096) {
1002 device_printf(sc->dev,
1003 "Using firmware currently running on NIC"
1004 ". For optimal\n");
1005 device_printf(sc->dev,
1006 "performance consider loading optimized "
1007 "firmware\n");
1009 sc->fw_name = mxge_fw_unaligned;
1010 sc->tx_boundary = 2048;
1011 return 0;
1013 /* clear confirmation addr */
1014 confirm = (volatile uint32_t *)sc->cmd;
1015 *confirm = 0;
1016 wmb();
1017 /* send a reload command to the bootstrap MCP, and wait for the
1018 response in the confirmation address. The firmware should
1019 write a -1 there to indicate it is alive and well
1022 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1023 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1025 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1026 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1027 buf[2] = htobe32(0xffffffff); /* confirm data */
1029 /* FIX: All newest firmware should un-protect the bottom of
1030 the sram before handoff. However, the very first interfaces
1031 do not. Therefore the handoff copy must skip the first 8 bytes
1033 /* where the code starts*/
1034 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1035 buf[4] = htobe32(size - 8); /* length of code */
1036 buf[5] = htobe32(8); /* where to copy to */
1037 buf[6] = htobe32(0); /* where to jump to */
1039 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1040 mxge_pio_copy(submit, buf, 64);
1041 wmb();
1042 DELAY(1000);
1043 wmb();
1044 i = 0;
1045 while (*confirm != 0xffffffff && i < 20) {
1046 DELAY(1000*10);
1047 i++;
1048 bus_dmamap_sync(sc->cmd_dma.dmat,
1049 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1051 if (*confirm != 0xffffffff) {
1052 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1053 confirm, *confirm);
1055 return ENXIO;
1057 return 0;
1060 static int
1061 mxge_update_mac_address(mxge_softc_t *sc)
1063 mxge_cmd_t cmd;
1064 uint8_t *addr = sc->mac_addr;
1065 int status;
1068 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1069 | (addr[2] << 8) | addr[3]);
1071 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1073 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1074 return status;
1077 static int
1078 mxge_change_pause(mxge_softc_t *sc, int pause)
1080 mxge_cmd_t cmd;
1081 int status;
1083 if (pause)
1084 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1085 &cmd);
1086 else
1087 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1088 &cmd);
1090 if (status) {
1091 device_printf(sc->dev, "Failed to set flow control mode\n");
1092 return ENXIO;
1094 sc->pause = pause;
1095 return 0;
1098 static void
1099 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1101 mxge_cmd_t cmd;
1102 int status;
1104 if( sc->ifp->if_serializer)
1105 ASSERT_SERIALIZED(sc->ifp->if_serializer);
1106 if (mxge_always_promisc)
1107 promisc = 1;
1109 if (promisc)
1110 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1111 &cmd);
1112 else
1113 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1114 &cmd);
1116 if (status) {
1117 device_printf(sc->dev, "Failed to set promisc mode\n");
1121 static void
1122 mxge_set_multicast_list(mxge_softc_t *sc)
1124 mxge_cmd_t cmd;
1125 struct ifmultiaddr *ifma;
1126 struct ifnet *ifp = sc->ifp;
1127 int err;
1129 if (ifp->if_serializer)
1130 ASSERT_SERIALIZED(ifp->if_serializer);
1132 /* This firmware is known to not support multicast */
1133 if (!sc->fw_multicast_support)
1134 return;
1136 /* Disable multicast filtering while we play with the lists*/
1137 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1138 if (err != 0) {
1139 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1140 " error status: %d\n", err);
1141 return;
1144 if (sc->adopted_rx_filter_bug)
1145 return;
1147 if (ifp->if_flags & IFF_ALLMULTI)
1148 /* request to disable multicast filtering, so quit here */
1149 return;
1151 /* Flush all the filters */
1153 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1154 if (err != 0) {
1155 device_printf(sc->dev,
1156 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1157 ", error status: %d\n", err);
1158 return;
1161 /* Walk the multicast list, and add each address */
1163 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1164 if (ifma->ifma_addr->sa_family != AF_LINK)
1165 continue;
1166 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1167 &cmd.data0, 4);
1168 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1169 &cmd.data1, 2);
1170 cmd.data0 = htonl(cmd.data0);
1171 cmd.data1 = htonl(cmd.data1);
1172 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1173 if (err != 0) {
1174 device_printf(sc->dev, "Failed "
1175 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1176 "%d\t", err);
1177 /* abort, leaving multicast filtering off */
1178 return;
1181 /* Enable multicast filtering */
1182 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1183 if (err != 0) {
1184 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1185 ", error status: %d\n", err);
1189 static int
1190 mxge_max_mtu(mxge_softc_t *sc)
1192 mxge_cmd_t cmd;
1193 int status;
1195 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1196 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1198 /* try to set nbufs to see if it we can
1199 use virtually contiguous jumbos */
1200 cmd.data0 = 0;
1201 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1202 &cmd);
1203 if (status == 0)
1204 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1206 /* otherwise, we're limited to MJUMPAGESIZE */
1207 return MJUMPAGESIZE - MXGEFW_PAD;
1210 static int
1211 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1213 struct mxge_slice_state *ss;
1214 mxge_rx_done_t *rx_done;
1215 volatile uint32_t *irq_claim;
1216 mxge_cmd_t cmd;
1217 int slice, status;
1219 /* try to send a reset command to the card to see if it
1220 is alive */
1221 memset(&cmd, 0, sizeof (cmd));
1222 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1223 if (status != 0) {
1224 device_printf(sc->dev, "failed reset\n");
1225 return ENXIO;
1228 mxge_dummy_rdma(sc, 1);
1231 /* set the intrq size */
1232 cmd.data0 = sc->rx_ring_size;
1233 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1236 * Even though we already know how many slices are supported
1237 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1238 * has magic side effects, and must be called after a reset.
1239 * It must be called prior to calling any RSS related cmds,
1240 * including assigning an interrupt queue for anything but
1241 * slice 0. It must also be called *after*
1242 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1243 * the firmware to compute offsets.
1246 if (sc->num_slices > 1) {
1247 /* ask the maximum number of slices it supports */
1248 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1249 &cmd);
1250 if (status != 0) {
1251 device_printf(sc->dev,
1252 "failed to get number of slices\n");
1253 return status;
1256 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1257 * to setting up the interrupt queue DMA
1259 cmd.data0 = sc->num_slices;
1260 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1261 #ifdef IFNET_BUF_RING
1262 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1263 #endif
1264 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1265 &cmd);
1266 if (status != 0) {
1267 device_printf(sc->dev,
1268 "failed to set number of slices\n");
1269 return status;
1274 if (interrupts_setup) {
1275 /* Now exchange information about interrupts */
1276 for (slice = 0; slice < sc->num_slices; slice++) {
1277 rx_done = &sc->ss[slice].rx_done;
1278 memset(rx_done->entry, 0, sc->rx_ring_size);
1279 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1280 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1281 cmd.data2 = slice;
1282 status |= mxge_send_cmd(sc,
1283 MXGEFW_CMD_SET_INTRQ_DMA,
1284 &cmd);
1288 status |= mxge_send_cmd(sc,
1289 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1292 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1294 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1295 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1298 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1299 &cmd);
1300 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1301 if (status != 0) {
1302 device_printf(sc->dev, "failed set interrupt parameters\n");
1303 return status;
1307 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1310 /* run a DMA benchmark */
1311 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1313 for (slice = 0; slice < sc->num_slices; slice++) {
1314 ss = &sc->ss[slice];
1316 ss->irq_claim = irq_claim + (2 * slice);
1317 /* reset mcp/driver shared state back to 0 */
1318 ss->rx_done.idx = 0;
1319 ss->rx_done.cnt = 0;
1320 ss->tx.req = 0;
1321 ss->tx.done = 0;
1322 ss->tx.pkt_done = 0;
1323 ss->tx.queue_active = 0;
1324 ss->tx.activate = 0;
1325 ss->tx.deactivate = 0;
1326 ss->tx.wake = 0;
1327 ss->tx.defrag = 0;
1328 ss->tx.stall = 0;
1329 ss->rx_big.cnt = 0;
1330 ss->rx_small.cnt = 0;
1331 ss->lro_bad_csum = 0;
1332 ss->lro_queued = 0;
1333 ss->lro_flushed = 0;
1334 if (ss->fw_stats != NULL) {
1335 ss->fw_stats->valid = 0;
1336 ss->fw_stats->send_done_count = 0;
1339 sc->rdma_tags_available = 15;
1340 status = mxge_update_mac_address(sc);
1341 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1342 mxge_change_pause(sc, sc->pause);
1343 mxge_set_multicast_list(sc);
1344 return status;
1347 static int
1348 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1350 mxge_softc_t *sc;
1351 unsigned int intr_coal_delay;
1352 int err;
1354 sc = arg1;
1355 intr_coal_delay = sc->intr_coal_delay;
1356 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1357 if (err != 0) {
1358 return err;
1360 if (intr_coal_delay == sc->intr_coal_delay)
1361 return 0;
1363 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1364 return EINVAL;
1366 lwkt_serialize_enter(sc->ifp->if_serializer);
1367 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1368 sc->intr_coal_delay = intr_coal_delay;
1370 lwkt_serialize_exit(sc->ifp->if_serializer);
1371 return err;
1374 static int
1375 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1377 mxge_softc_t *sc;
1378 unsigned int enabled;
1379 int err;
1381 sc = arg1;
1382 enabled = sc->pause;
1383 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1384 if (err != 0) {
1385 return err;
1387 if (enabled == sc->pause)
1388 return 0;
1390 lwkt_serialize_enter(sc->ifp->if_serializer);
1391 err = mxge_change_pause(sc, enabled);
1392 lwkt_serialize_exit(sc->ifp->if_serializer);
1393 return err;
1396 static int
1397 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1399 struct ifnet *ifp;
1400 int err = 0;
1402 ifp = sc->ifp;
1403 if (lro_cnt == 0)
1404 ifp->if_capenable &= ~IFCAP_LRO;
1405 else
1406 ifp->if_capenable |= IFCAP_LRO;
1407 sc->lro_cnt = lro_cnt;
1408 if (ifp->if_flags & IFF_RUNNING) {
1409 mxge_close(sc);
1410 err = mxge_open(sc);
1412 return err;
1415 static int
1416 mxge_change_lro(SYSCTL_HANDLER_ARGS)
1418 mxge_softc_t *sc;
1419 unsigned int lro_cnt;
1420 int err;
1422 sc = arg1;
1423 lro_cnt = sc->lro_cnt;
1424 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1425 if (err != 0)
1426 return err;
1428 if (lro_cnt == sc->lro_cnt)
1429 return 0;
1431 if (lro_cnt > 128)
1432 return EINVAL;
1434 lwkt_serialize_enter(sc->ifp->if_serializer);
1435 err = mxge_change_lro_locked(sc, lro_cnt);
1436 lwkt_serialize_exit(sc->ifp->if_serializer);
1437 return err;
1440 static int
1441 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1443 int err;
1445 if (arg1 == NULL)
1446 return EFAULT;
1447 arg2 = be32toh(*(int *)arg1);
1448 arg1 = NULL;
1449 err = sysctl_handle_int(oidp, arg1, arg2, req);
1451 return err;
1454 static void
1455 mxge_rem_sysctls(mxge_softc_t *sc)
1457 struct mxge_slice_state *ss;
1458 int slice;
1460 if (sc->slice_sysctl_tree == NULL)
1461 return;
1463 for (slice = 0; slice < sc->num_slices; slice++) {
1464 ss = &sc->ss[slice];
1465 if (ss == NULL || ss->sysctl_tree == NULL)
1466 continue;
1467 sysctl_ctx_free(&ss->sysctl_ctx);
1468 ss->sysctl_tree = NULL;
1470 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1471 sc->slice_sysctl_tree = NULL;
1472 sysctl_ctx_free(&sc->sysctl_ctx);
1473 sc->sysctl_tree = NULL;
1477 static void
1478 mxge_add_sysctls(mxge_softc_t *sc)
1480 struct sysctl_ctx_list *ctx;
1481 struct sysctl_oid_list *children;
1482 mcp_irq_data_t *fw;
1483 struct mxge_slice_state *ss;
1484 int slice;
1485 char slice_num[8];
1487 ctx = &sc->sysctl_ctx;
1488 sysctl_ctx_init(ctx);
1489 sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw),
1490 OID_AUTO,
1491 device_get_nameunit(sc->dev),
1492 CTLFLAG_RD, 0, "");
1493 if (sc->sysctl_tree == NULL) {
1494 device_printf(sc->dev, "can't add sysctl node\n");
1495 return;
1498 children = SYSCTL_CHILDREN(sc->sysctl_tree);
1499 fw = sc->ss[0].fw_stats;
1501 /* random information */
1502 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1503 "firmware_version",
1504 CTLFLAG_RD, &sc->fw_version,
1505 0, "firmware version");
1506 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1507 "serial_number",
1508 CTLFLAG_RD, &sc->serial_number_string,
1509 0, "serial number");
1510 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1511 "product_code",
1512 CTLFLAG_RD, &sc->product_code_string,
1513 0, "product_code");
1514 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1515 "pcie_link_width",
1516 CTLFLAG_RD, &sc->link_width,
1517 0, "tx_boundary");
1518 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1519 "tx_boundary",
1520 CTLFLAG_RD, &sc->tx_boundary,
1521 0, "tx_boundary");
1522 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1523 "write_combine",
1524 CTLFLAG_RD, &sc->wc,
1525 0, "write combining PIO?");
1526 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1527 "read_dma_MBs",
1528 CTLFLAG_RD, &sc->read_dma,
1529 0, "DMA Read speed in MB/s");
1530 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1531 "write_dma_MBs",
1532 CTLFLAG_RD, &sc->write_dma,
1533 0, "DMA Write speed in MB/s");
1534 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1535 "read_write_dma_MBs",
1536 CTLFLAG_RD, &sc->read_write_dma,
1537 0, "DMA concurrent Read/Write speed in MB/s");
1540 /* performance related tunables */
1541 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1542 "intr_coal_delay",
1543 CTLTYPE_INT|CTLFLAG_RW, sc,
1544 0, mxge_change_intr_coal,
1545 "I", "interrupt coalescing delay in usecs");
1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1548 "flow_control_enabled",
1549 CTLTYPE_INT|CTLFLAG_RW, sc,
1550 0, mxge_change_flow_control,
1551 "I", "interrupt coalescing delay in usecs");
1553 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1554 "deassert_wait",
1555 CTLFLAG_RW, &mxge_deassert_wait,
1556 0, "Wait for IRQ line to go low in ihandler");
1558 /* stats block from firmware is in network byte order.
1559 Need to swap it */
1560 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1561 "link_up",
1562 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1563 0, mxge_handle_be32,
1564 "I", "link up");
1565 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1566 "rdma_tags_available",
1567 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1568 0, mxge_handle_be32,
1569 "I", "rdma_tags_available");
1570 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1571 "dropped_bad_crc32",
1572 CTLTYPE_INT|CTLFLAG_RD,
1573 &fw->dropped_bad_crc32,
1574 0, mxge_handle_be32,
1575 "I", "dropped_bad_crc32");
1576 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1577 "dropped_bad_phy",
1578 CTLTYPE_INT|CTLFLAG_RD,
1579 &fw->dropped_bad_phy,
1580 0, mxge_handle_be32,
1581 "I", "dropped_bad_phy");
1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1583 "dropped_link_error_or_filtered",
1584 CTLTYPE_INT|CTLFLAG_RD,
1585 &fw->dropped_link_error_or_filtered,
1586 0, mxge_handle_be32,
1587 "I", "dropped_link_error_or_filtered");
1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1589 "dropped_link_overflow",
1590 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1591 0, mxge_handle_be32,
1592 "I", "dropped_link_overflow");
1593 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1594 "dropped_multicast_filtered",
1595 CTLTYPE_INT|CTLFLAG_RD,
1596 &fw->dropped_multicast_filtered,
1597 0, mxge_handle_be32,
1598 "I", "dropped_multicast_filtered");
1599 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1600 "dropped_no_big_buffer",
1601 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1602 0, mxge_handle_be32,
1603 "I", "dropped_no_big_buffer");
1604 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1605 "dropped_no_small_buffer",
1606 CTLTYPE_INT|CTLFLAG_RD,
1607 &fw->dropped_no_small_buffer,
1608 0, mxge_handle_be32,
1609 "I", "dropped_no_small_buffer");
1610 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1611 "dropped_overrun",
1612 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1613 0, mxge_handle_be32,
1614 "I", "dropped_overrun");
1615 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1616 "dropped_pause",
1617 CTLTYPE_INT|CTLFLAG_RD,
1618 &fw->dropped_pause,
1619 0, mxge_handle_be32,
1620 "I", "dropped_pause");
1621 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1622 "dropped_runt",
1623 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1624 0, mxge_handle_be32,
1625 "I", "dropped_runt");
1627 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1628 "dropped_unicast_filtered",
1629 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1630 0, mxge_handle_be32,
1631 "I", "dropped_unicast_filtered");
1633 /* verbose printing? */
1634 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1635 "verbose",
1636 CTLFLAG_RW, &mxge_verbose,
1637 0, "verbose printing");
1639 /* lro */
1640 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1641 "lro_cnt",
1642 CTLTYPE_INT|CTLFLAG_RW, sc,
1643 0, mxge_change_lro,
1644 "I", "number of lro merge queues");
1647 /* add counters exported for debugging from all slices */
1648 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1649 sc->slice_sysctl_tree =
1650 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1651 "slice", CTLFLAG_RD, 0, "");
1653 for (slice = 0; slice < sc->num_slices; slice++) {
1654 ss = &sc->ss[slice];
1655 sysctl_ctx_init(&ss->sysctl_ctx);
1656 ctx = &ss->sysctl_ctx;
1657 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1658 ksprintf(slice_num, "%d", slice);
1659 ss->sysctl_tree =
1660 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1661 CTLFLAG_RD, 0, "");
1662 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1663 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1664 "rx_small_cnt",
1665 CTLFLAG_RD, &ss->rx_small.cnt,
1666 0, "rx_small_cnt");
1667 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1668 "rx_big_cnt",
1669 CTLFLAG_RD, &ss->rx_big.cnt,
1670 0, "rx_small_cnt");
1671 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1672 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1673 0, "number of lro merge queues flushed");
1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1676 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1677 0, "number of frames appended to lro merge"
1678 "queues");
1680 #ifndef IFNET_BUF_RING
1681 /* only transmit from slice 0 for now */
1682 if (slice > 0)
1683 continue;
1684 #endif
1685 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1686 "tx_req",
1687 CTLFLAG_RD, &ss->tx.req,
1688 0, "tx_req");
1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1691 "tx_done",
1692 CTLFLAG_RD, &ss->tx.done,
1693 0, "tx_done");
1694 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1695 "tx_pkt_done",
1696 CTLFLAG_RD, &ss->tx.pkt_done,
1697 0, "tx_done");
1698 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1699 "tx_stall",
1700 CTLFLAG_RD, &ss->tx.stall,
1701 0, "tx_stall");
1702 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1703 "tx_wake",
1704 CTLFLAG_RD, &ss->tx.wake,
1705 0, "tx_wake");
1706 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1707 "tx_defrag",
1708 CTLFLAG_RD, &ss->tx.defrag,
1709 0, "tx_defrag");
1710 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1711 "tx_queue_active",
1712 CTLFLAG_RD, &ss->tx.queue_active,
1713 0, "tx_queue_active");
1714 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1715 "tx_activate",
1716 CTLFLAG_RD, &ss->tx.activate,
1717 0, "tx_activate");
1718 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1719 "tx_deactivate",
1720 CTLFLAG_RD, &ss->tx.deactivate,
1721 0, "tx_deactivate");
1725 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1726 backwards one at a time and handle ring wraps */
1728 static inline void
1729 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1730 mcp_kreq_ether_send_t *src, int cnt)
1732 int idx, starting_slot;
1733 starting_slot = tx->req;
1734 while (cnt > 1) {
1735 cnt--;
1736 idx = (starting_slot + cnt) & tx->mask;
1737 mxge_pio_copy(&tx->lanai[idx],
1738 &src[cnt], sizeof(*src));
1739 wmb();
1744 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1745 * at most 32 bytes at a time, so as to avoid involving the software
1746 * pio handler in the nic. We re-write the first segment's flags
1747 * to mark them valid only after writing the entire chain
1750 static inline void
1751 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1752 int cnt)
1754 int idx, i;
1755 uint32_t *src_ints;
1756 volatile uint32_t *dst_ints;
1757 mcp_kreq_ether_send_t *srcp;
1758 volatile mcp_kreq_ether_send_t *dstp, *dst;
1759 uint8_t last_flags;
1761 idx = tx->req & tx->mask;
1763 last_flags = src->flags;
1764 src->flags = 0;
1765 wmb();
1766 dst = dstp = &tx->lanai[idx];
1767 srcp = src;
1769 if ((idx + cnt) < tx->mask) {
1770 for (i = 0; i < (cnt - 1); i += 2) {
1771 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1772 wmb(); /* force write every 32 bytes */
1773 srcp += 2;
1774 dstp += 2;
1776 } else {
1777 /* submit all but the first request, and ensure
1778 that it is submitted below */
1779 mxge_submit_req_backwards(tx, src, cnt);
1780 i = 0;
1782 if (i < cnt) {
1783 /* submit the first request */
1784 mxge_pio_copy(dstp, srcp, sizeof(*src));
1785 wmb(); /* barrier before setting valid flag */
1788 /* re-write the last 32-bits with the valid flags */
1789 src->flags = last_flags;
1790 src_ints = (uint32_t *)src;
1791 src_ints+=3;
1792 dst_ints = (volatile uint32_t *)dst;
1793 dst_ints+=3;
1794 *dst_ints = *src_ints;
1795 tx->req += cnt;
1796 wmb();
1799 #if IFCAP_TSO4
1801 static void
1802 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1803 int busdma_seg_cnt, int ip_off)
1805 mxge_tx_ring_t *tx;
1806 mcp_kreq_ether_send_t *req;
1807 bus_dma_segment_t *seg;
1808 struct ip *ip;
1809 struct tcphdr *tcp;
1810 uint32_t low, high_swapped;
1811 int len, seglen, cum_len, cum_len_next;
1812 int next_is_first, chop, cnt, rdma_count, small;
1813 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1814 uint8_t flags, flags_next;
1815 static int once;
1817 mss = m->m_pkthdr.tso_segsz;
1819 /* negative cum_len signifies to the
1820 * send loop that we are still in the
1821 * header portion of the TSO packet.
1824 /* ensure we have the ethernet, IP and TCP
1825 header together in the first mbuf, copy
1826 it to a scratch buffer if not */
1827 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1828 m_copydata(m, 0, ip_off + sizeof (*ip),
1829 ss->scratch);
1830 ip = (struct ip *)(ss->scratch + ip_off);
1831 } else {
1832 ip = (struct ip *)(mtod(m, char *) + ip_off);
1834 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1835 + sizeof (*tcp))) {
1836 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1837 + sizeof (*tcp), ss->scratch);
1838 ip = (struct ip *)(mtod(m, char *) + ip_off);
1841 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1842 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1844 /* TSO implies checksum offload on this hardware */
1845 cksum_offset = ip_off + (ip->ip_hl << 2);
1846 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1849 /* for TSO, pseudo_hdr_offset holds mss.
1850 * The firmware figures out where to put
1851 * the checksum by parsing the header. */
1852 pseudo_hdr_offset = htobe16(mss);
1854 tx = &ss->tx;
1855 req = tx->req_list;
1856 seg = tx->seg_list;
1857 cnt = 0;
1858 rdma_count = 0;
1859 /* "rdma_count" is the number of RDMAs belonging to the
1860 * current packet BEFORE the current send request. For
1861 * non-TSO packets, this is equal to "count".
1862 * For TSO packets, rdma_count needs to be reset
1863 * to 0 after a segment cut.
1865 * The rdma_count field of the send request is
1866 * the number of RDMAs of the packet starting at
1867 * that request. For TSO send requests with one ore more cuts
1868 * in the middle, this is the number of RDMAs starting
1869 * after the last cut in the request. All previous
1870 * segments before the last cut implicitly have 1 RDMA.
1872 * Since the number of RDMAs is not known beforehand,
1873 * it must be filled-in retroactively - after each
1874 * segmentation cut or at the end of the entire packet.
1877 while (busdma_seg_cnt) {
1878 /* Break the busdma segment up into pieces*/
1879 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1880 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1881 len = seg->ds_len;
1883 while (len) {
1884 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1885 seglen = len;
1886 cum_len_next = cum_len + seglen;
1887 (req-rdma_count)->rdma_count = rdma_count + 1;
1888 if (__predict_true(cum_len >= 0)) {
1889 /* payload */
1890 chop = (cum_len_next > mss);
1891 cum_len_next = cum_len_next % mss;
1892 next_is_first = (cum_len_next == 0);
1893 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1894 flags_next |= next_is_first *
1895 MXGEFW_FLAGS_FIRST;
1896 rdma_count |= -(chop | next_is_first);
1897 rdma_count += chop & !next_is_first;
1898 } else if (cum_len_next >= 0) {
1899 /* header ends */
1900 rdma_count = -1;
1901 cum_len_next = 0;
1902 seglen = -cum_len;
1903 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1904 flags_next = MXGEFW_FLAGS_TSO_PLD |
1905 MXGEFW_FLAGS_FIRST |
1906 (small * MXGEFW_FLAGS_SMALL);
1909 req->addr_high = high_swapped;
1910 req->addr_low = htobe32(low);
1911 req->pseudo_hdr_offset = pseudo_hdr_offset;
1912 req->pad = 0;
1913 req->rdma_count = 1;
1914 req->length = htobe16(seglen);
1915 req->cksum_offset = cksum_offset;
1916 req->flags = flags | ((cum_len & 1) *
1917 MXGEFW_FLAGS_ALIGN_ODD);
1918 low += seglen;
1919 len -= seglen;
1920 cum_len = cum_len_next;
1921 flags = flags_next;
1922 req++;
1923 cnt++;
1924 rdma_count++;
1925 if (__predict_false(cksum_offset > seglen))
1926 cksum_offset -= seglen;
1927 else
1928 cksum_offset = 0;
1929 if (__predict_false(cnt > tx->max_desc))
1930 goto drop;
1932 busdma_seg_cnt--;
1933 seg++;
1935 (req-rdma_count)->rdma_count = rdma_count;
1937 do {
1938 req--;
1939 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1940 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1942 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1943 mxge_submit_req(tx, tx->req_list, cnt);
1944 #ifdef IFNET_BUF_RING
1945 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1946 /* tell the NIC to start polling this slice */
1947 *tx->send_go = 1;
1948 tx->queue_active = 1;
1949 tx->activate++;
1950 wmb();
1952 #endif
1953 return;
1955 drop:
1956 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1957 m_freem(m);
1958 ss->oerrors++;
1959 if (!once) {
1960 kprintf("tx->max_desc exceeded via TSO!\n");
1961 kprintf("mss = %d, %ld, %d!\n", mss,
1962 (long)seg - (long)tx->seg_list, tx->max_desc);
1963 once = 1;
1965 return;
1969 #endif /* IFCAP_TSO4 */
1971 #ifdef MXGE_NEW_VLAN_API
1973 * We reproduce the software vlan tag insertion from
1974 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1975 * vlan tag insertion. We need to advertise this in order to have the
1976 * vlan interface respect our csum offload flags.
1978 static struct mbuf *
1979 mxge_vlan_tag_insert(struct mbuf *m)
1981 struct ether_vlan_header *evl;
1983 M_PREPEND(m, EVL_ENCAPLEN, MB_DONTWAIT);
1984 if (__predict_false(m == NULL))
1985 return NULL;
1986 if (m->m_len < sizeof(*evl)) {
1987 m = m_pullup(m, sizeof(*evl));
1988 if (__predict_false(m == NULL))
1989 return NULL;
1992 * Transform the Ethernet header into an Ethernet header
1993 * with 802.1Q encapsulation.
1995 evl = mtod(m, struct ether_vlan_header *);
1996 bcopy((char *)evl + EVL_ENCAPLEN,
1997 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1998 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1999 evl->evl_tag = htons(m->m_pkthdr.ether_vlantag);
2000 m->m_flags &= ~M_VLANTAG;
2001 return m;
2003 #endif /* MXGE_NEW_VLAN_API */
2005 static void
2006 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2008 mxge_softc_t *sc;
2009 mcp_kreq_ether_send_t *req;
2010 bus_dma_segment_t *seg;
2011 struct mbuf *m_tmp;
2012 struct ifnet *ifp;
2013 mxge_tx_ring_t *tx;
2014 struct ip *ip;
2015 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
2016 uint16_t pseudo_hdr_offset;
2017 uint8_t flags, cksum_offset;
2020 sc = ss->sc;
2021 ifp = sc->ifp;
2022 tx = &ss->tx;
2024 ip_off = sizeof (struct ether_header);
2025 #ifdef MXGE_NEW_VLAN_API
2026 if (m->m_flags & M_VLANTAG) {
2027 m = mxge_vlan_tag_insert(m);
2028 if (__predict_false(m == NULL))
2029 goto drop;
2030 ip_off += EVL_ENCAPLEN;
2032 #endif
2033 /* (try to) map the frame for DMA */
2034 idx = tx->req & tx->mask;
2035 err = bus_dmamap_load_mbuf_segment(tx->dmat, tx->info[idx].map,
2036 m, tx->seg_list, 1, &cnt,
2037 BUS_DMA_NOWAIT);
2038 if (__predict_false(err == EFBIG)) {
2039 /* Too many segments in the chain. Try
2040 to defrag */
2041 m_tmp = m_defrag(m, M_NOWAIT);
2042 if (m_tmp == NULL) {
2043 goto drop;
2045 ss->tx.defrag++;
2046 m = m_tmp;
2047 err = bus_dmamap_load_mbuf_segment(tx->dmat,
2048 tx->info[idx].map,
2049 m, tx->seg_list, 1, &cnt,
2050 BUS_DMA_NOWAIT);
2052 if (__predict_false(err != 0)) {
2053 device_printf(sc->dev, "bus_dmamap_load_mbuf_segment returned %d"
2054 " packet len = %d\n", err, m->m_pkthdr.len);
2055 goto drop;
2057 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2058 BUS_DMASYNC_PREWRITE);
2059 tx->info[idx].m = m;
2061 #if IFCAP_TSO4
2062 /* TSO is different enough, we handle it in another routine */
2063 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2064 mxge_encap_tso(ss, m, cnt, ip_off);
2065 return;
2067 #endif
2069 req = tx->req_list;
2070 cksum_offset = 0;
2071 pseudo_hdr_offset = 0;
2072 flags = MXGEFW_FLAGS_NO_TSO;
2074 /* checksum offloading? */
2075 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
2076 /* ensure ip header is in first mbuf, copy
2077 it to a scratch buffer if not */
2078 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2079 m_copydata(m, 0, ip_off + sizeof (*ip),
2080 ss->scratch);
2081 ip = (struct ip *)(ss->scratch + ip_off);
2082 } else {
2083 ip = (struct ip *)(mtod(m, char *) + ip_off);
2085 cksum_offset = ip_off + (ip->ip_hl << 2);
2086 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2087 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2088 req->cksum_offset = cksum_offset;
2089 flags |= MXGEFW_FLAGS_CKSUM;
2090 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2091 } else {
2092 odd_flag = 0;
2094 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2095 flags |= MXGEFW_FLAGS_SMALL;
2097 /* convert segments into a request list */
2098 cum_len = 0;
2099 seg = tx->seg_list;
2100 req->flags = MXGEFW_FLAGS_FIRST;
2101 for (i = 0; i < cnt; i++) {
2102 req->addr_low =
2103 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2104 req->addr_high =
2105 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2106 req->length = htobe16(seg->ds_len);
2107 req->cksum_offset = cksum_offset;
2108 if (cksum_offset > seg->ds_len)
2109 cksum_offset -= seg->ds_len;
2110 else
2111 cksum_offset = 0;
2112 req->pseudo_hdr_offset = pseudo_hdr_offset;
2113 req->pad = 0; /* complete solid 16-byte block */
2114 req->rdma_count = 1;
2115 req->flags |= flags | ((cum_len & 1) * odd_flag);
2116 cum_len += seg->ds_len;
2117 seg++;
2118 req++;
2119 req->flags = 0;
2121 req--;
2122 /* pad runts to 60 bytes */
2123 if (cum_len < 60) {
2124 req++;
2125 req->addr_low =
2126 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2127 req->addr_high =
2128 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2129 req->length = htobe16(60 - cum_len);
2130 req->cksum_offset = 0;
2131 req->pseudo_hdr_offset = pseudo_hdr_offset;
2132 req->pad = 0; /* complete solid 16-byte block */
2133 req->rdma_count = 1;
2134 req->flags |= flags | ((cum_len & 1) * odd_flag);
2135 cnt++;
2138 tx->req_list[0].rdma_count = cnt;
2139 #if 0
2140 /* print what the firmware will see */
2141 for (i = 0; i < cnt; i++) {
2142 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2143 "cso:%d, flags:0x%x, rdma:%d\n",
2144 i, (int)ntohl(tx->req_list[i].addr_high),
2145 (int)ntohl(tx->req_list[i].addr_low),
2146 (int)ntohs(tx->req_list[i].length),
2147 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2148 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2149 tx->req_list[i].rdma_count);
2151 kprintf("--------------\n");
2152 #endif
2153 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2154 mxge_submit_req(tx, tx->req_list, cnt);
2155 #ifdef IFNET_BUF_RING
2156 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2157 /* tell the NIC to start polling this slice */
2158 *tx->send_go = 1;
2159 tx->queue_active = 1;
2160 tx->activate++;
2161 wmb();
2163 #endif
2164 return;
2166 drop:
2167 m_freem(m);
2168 ss->oerrors++;
2169 return;
2172 #ifdef IFNET_BUF_RING
2173 static void
2174 mxge_qflush(struct ifnet *ifp)
2176 mxge_softc_t *sc = ifp->if_softc;
2177 mxge_tx_ring_t *tx;
2178 struct mbuf *m;
2179 int slice;
2181 for (slice = 0; slice < sc->num_slices; slice++) {
2182 tx = &sc->ss[slice].tx;
2183 lwkt_serialize_enter(sc->ifp->if_serializer);
2184 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2185 m_freem(m);
2186 lwkt_serialize_exit(sc->ifp->if_serializer);
2188 if_qflush(ifp);
2191 static inline void
2192 mxge_start_locked(struct mxge_slice_state *ss)
2194 mxge_softc_t *sc;
2195 struct mbuf *m;
2196 struct ifnet *ifp;
2197 mxge_tx_ring_t *tx;
2199 sc = ss->sc;
2200 ifp = sc->ifp;
2201 tx = &ss->tx;
2203 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2204 m = drbr_dequeue(ifp, tx->br);
2205 if (m == NULL) {
2206 return;
2208 /* let BPF see it */
2209 BPF_MTAP(ifp, m);
2211 /* give it to the nic */
2212 mxge_encap(ss, m);
2214 /* ran out of transmit slots */
2215 if (((ss->if_flags & IFF_OACTIVE) == 0)
2216 && (!drbr_empty(ifp, tx->br))) {
2217 ss->if_flags |= IFF_OACTIVE;
2218 tx->stall++;
2222 static int
2223 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2225 mxge_softc_t *sc;
2226 struct ifnet *ifp;
2227 mxge_tx_ring_t *tx;
2228 int err;
2230 sc = ss->sc;
2231 ifp = sc->ifp;
2232 tx = &ss->tx;
2234 if ((ss->if_flags & (IFF_RUNNING|IFF_OACTIVE)) !=
2235 IFF_RUNNING) {
2236 err = drbr_enqueue(ifp, tx->br, m);
2237 return (err);
2240 if (drbr_empty(ifp, tx->br) &&
2241 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2242 /* let BPF see it */
2243 BPF_MTAP(ifp, m);
2244 /* give it to the nic */
2245 mxge_encap(ss, m);
2246 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2247 return (err);
2249 if (!drbr_empty(ifp, tx->br))
2250 mxge_start_locked(ss);
2251 return (0);
2254 static int
2255 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2257 mxge_softc_t *sc = ifp->if_softc;
2258 struct mxge_slice_state *ss;
2259 mxge_tx_ring_t *tx;
2260 int err = 0;
2261 int slice;
2263 #if 0
2264 slice = m->m_pkthdr.flowid;
2265 #endif
2266 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2268 ss = &sc->ss[slice];
2269 tx = &ss->tx;
2271 if(lwkt_serialize_try(ifp->if_serializer)) {
2272 err = mxge_transmit_locked(ss, m);
2273 lwkt_serialize_exit(ifp->if_serializer);
2274 } else {
2275 err = drbr_enqueue(ifp, tx->br, m);
2278 return (err);
2281 #else
2283 static inline void
2284 mxge_start_locked(struct mxge_slice_state *ss)
2286 mxge_softc_t *sc;
2287 struct mbuf *m;
2288 struct ifnet *ifp;
2289 mxge_tx_ring_t *tx;
2291 sc = ss->sc;
2292 ifp = sc->ifp;
2293 tx = &ss->tx;
2294 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2295 m = ifq_dequeue(&ifp->if_snd, NULL);
2296 if (m == NULL) {
2297 return;
2299 /* let BPF see it */
2300 BPF_MTAP(ifp, m);
2302 /* give it to the nic */
2303 mxge_encap(ss, m);
2305 /* ran out of transmit slots */
2306 if ((sc->ifp->if_flags & IFF_OACTIVE) == 0) {
2307 sc->ifp->if_flags |= IFF_OACTIVE;
2308 tx->stall++;
2311 #endif
2312 static void
2313 mxge_start(struct ifnet *ifp)
2315 mxge_softc_t *sc = ifp->if_softc;
2316 struct mxge_slice_state *ss;
2318 ASSERT_SERIALIZED(sc->ifp->if_serializer);
2319 /* only use the first slice for now */
2320 ss = &sc->ss[0];
2321 mxge_start_locked(ss);
2325 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2326 * at most 32 bytes at a time, so as to avoid involving the software
2327 * pio handler in the nic. We re-write the first segment's low
2328 * DMA address to mark it valid only after we write the entire chunk
2329 * in a burst
2331 static inline void
2332 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2333 mcp_kreq_ether_recv_t *src)
2335 uint32_t low;
2337 low = src->addr_low;
2338 src->addr_low = 0xffffffff;
2339 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2340 wmb();
2341 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2342 wmb();
2343 src->addr_low = low;
2344 dst->addr_low = low;
2345 wmb();
2348 static int
2349 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2351 bus_dma_segment_t seg;
2352 struct mbuf *m;
2353 mxge_rx_ring_t *rx = &ss->rx_small;
2354 int cnt, err;
2356 m = m_gethdr(MB_DONTWAIT, MT_DATA);
2357 if (m == NULL) {
2358 rx->alloc_fail++;
2359 err = ENOBUFS;
2360 goto done;
2362 m->m_len = m->m_pkthdr.len = MHLEN;
2363 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2364 &seg, 1, &cnt, BUS_DMA_NOWAIT);
2365 if (err != 0) {
2366 kprintf("can't dmamap small (%d)\n", err);
2367 m_free(m);
2368 goto done;
2370 rx->info[idx].m = m;
2371 rx->shadow[idx].addr_low =
2372 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2373 rx->shadow[idx].addr_high =
2374 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2376 done:
2377 if ((idx & 7) == 7)
2378 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2379 return err;
2383 static int
2384 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2386 bus_dma_segment_t seg[3];
2387 struct mbuf *m;
2388 mxge_rx_ring_t *rx = &ss->rx_big;
2389 int cnt, err, i;
2391 if (rx->cl_size == MCLBYTES)
2392 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
2393 else {
2394 #if 0
2395 m = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2396 #else
2398 * XXX: allocate normal sized buffers for big buffers.
2399 * We should be fine as long as we don't get any jumbo frames
2401 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
2402 #endif
2404 if (m == NULL) {
2405 rx->alloc_fail++;
2406 err = ENOBUFS;
2407 goto done;
2409 m->m_pkthdr.len = 0;
2410 m->m_len = m->m_pkthdr.len = rx->mlen;
2411 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2412 seg, 1, &cnt, BUS_DMA_NOWAIT);
2413 if (err != 0) {
2414 kprintf("can't dmamap big (%d)\n", err);
2415 m_free(m);
2416 goto done;
2418 rx->info[idx].m = m;
2419 rx->shadow[idx].addr_low =
2420 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2421 rx->shadow[idx].addr_high =
2422 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2424 #if MXGE_VIRT_JUMBOS
2425 for (i = 1; i < cnt; i++) {
2426 rx->shadow[idx + i].addr_low =
2427 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2428 rx->shadow[idx + i].addr_high =
2429 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2431 #endif
2433 done:
2434 for (i = 0; i < rx->nbufs; i++) {
2435 if ((idx & 7) == 7) {
2436 mxge_submit_8rx(&rx->lanai[idx - 7],
2437 &rx->shadow[idx - 7]);
2439 idx++;
2441 return err;
2445 * Myri10GE hardware checksums are not valid if the sender
2446 * padded the frame with non-zero padding. This is because
2447 * the firmware just does a simple 16-bit 1s complement
2448 * checksum across the entire frame, excluding the first 14
2449 * bytes. It is best to simply to check the checksum and
2450 * tell the stack about it only if the checksum is good
2453 static inline uint16_t
2454 mxge_rx_csum(struct mbuf *m, int csum)
2456 struct ether_header *eh;
2457 struct ip *ip;
2458 uint16_t c;
2460 eh = mtod(m, struct ether_header *);
2462 /* only deal with IPv4 TCP & UDP for now */
2463 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2464 return 1;
2465 ip = (struct ip *)(eh + 1);
2466 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2467 ip->ip_p != IPPROTO_UDP))
2468 return 1;
2469 #ifdef INET
2470 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2471 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2472 - (ip->ip_hl << 2) + ip->ip_p));
2473 #else
2474 c = 1;
2475 #endif
2476 c ^= 0xffff;
2477 return (c);
2480 static void
2481 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2483 struct ether_vlan_header *evl;
2484 struct ether_header *eh;
2485 uint32_t partial;
2487 evl = mtod(m, struct ether_vlan_header *);
2488 eh = mtod(m, struct ether_header *);
2491 * fix checksum by subtracting EVL_ENCAPLEN bytes
2492 * after what the firmware thought was the end of the ethernet
2493 * header.
2496 /* put checksum into host byte order */
2497 *csum = ntohs(*csum);
2498 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2499 (*csum) += ~partial;
2500 (*csum) += ((*csum) < ~partial);
2501 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2502 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2504 /* restore checksum to network byte order;
2505 later consumers expect this */
2506 *csum = htons(*csum);
2508 /* save the tag */
2509 #ifdef MXGE_NEW_VLAN_API
2510 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
2511 #else
2513 struct m_tag *mtag;
2514 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2515 MB_DONTWAIT);
2516 if (mtag == NULL)
2517 return;
2518 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2519 m_tag_prepend(m, mtag);
2522 #endif
2523 m->m_flags |= M_VLANTAG;
2526 * Remove the 802.1q header by copying the Ethernet
2527 * addresses over it and adjusting the beginning of
2528 * the data in the mbuf. The encapsulated Ethernet
2529 * type field is already in place.
2531 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN,
2532 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2533 m_adj(m, EVL_ENCAPLEN);
2537 static inline void
2538 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum,
2539 struct mbuf_chain *chain)
2541 mxge_softc_t *sc;
2542 struct ifnet *ifp;
2543 struct mbuf *m;
2544 struct ether_header *eh;
2545 mxge_rx_ring_t *rx;
2546 bus_dmamap_t old_map;
2547 int idx;
2548 uint16_t tcpudp_csum;
2550 sc = ss->sc;
2551 ifp = sc->ifp;
2552 rx = &ss->rx_big;
2553 idx = rx->cnt & rx->mask;
2554 rx->cnt += rx->nbufs;
2555 /* save a pointer to the received mbuf */
2556 m = rx->info[idx].m;
2557 /* try to replace the received mbuf */
2558 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2559 /* drop the frame -- the old mbuf is re-cycled */
2560 ifp->if_ierrors++;
2561 return;
2564 /* unmap the received buffer */
2565 old_map = rx->info[idx].map;
2566 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2567 bus_dmamap_unload(rx->dmat, old_map);
2569 /* swap the bus_dmamap_t's */
2570 rx->info[idx].map = rx->extra_map;
2571 rx->extra_map = old_map;
2573 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2574 * aligned */
2575 m->m_data += MXGEFW_PAD;
2577 m->m_pkthdr.rcvif = ifp;
2578 m->m_len = m->m_pkthdr.len = len;
2579 ss->ipackets++;
2580 eh = mtod(m, struct ether_header *);
2581 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2582 mxge_vlan_tag_remove(m, &csum);
2584 /* if the checksum is valid, mark it in the mbuf header */
2585 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2586 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2587 return;
2588 /* otherwise, it was a UDP frame, or a TCP frame which
2589 we could not do LRO on. Tell the stack that the
2590 checksum is good */
2591 m->m_pkthdr.csum_data = 0xffff;
2592 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2594 #if 0
2595 /* flowid only valid if RSS hashing is enabled */
2596 if (sc->num_slices > 1) {
2597 m->m_pkthdr.flowid = (ss - sc->ss);
2598 m->m_flags |= M_FLOWID;
2600 #endif
2601 ether_input_chain(ifp, m, NULL, chain);
2604 static inline void
2605 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum,
2606 struct mbuf_chain *chain)
2608 mxge_softc_t *sc;
2609 struct ifnet *ifp;
2610 struct ether_header *eh;
2611 struct mbuf *m;
2612 mxge_rx_ring_t *rx;
2613 bus_dmamap_t old_map;
2614 int idx;
2615 uint16_t tcpudp_csum;
2617 sc = ss->sc;
2618 ifp = sc->ifp;
2619 rx = &ss->rx_small;
2620 idx = rx->cnt & rx->mask;
2621 rx->cnt++;
2622 /* save a pointer to the received mbuf */
2623 m = rx->info[idx].m;
2624 /* try to replace the received mbuf */
2625 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2626 /* drop the frame -- the old mbuf is re-cycled */
2627 ifp->if_ierrors++;
2628 return;
2631 /* unmap the received buffer */
2632 old_map = rx->info[idx].map;
2633 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2634 bus_dmamap_unload(rx->dmat, old_map);
2636 /* swap the bus_dmamap_t's */
2637 rx->info[idx].map = rx->extra_map;
2638 rx->extra_map = old_map;
2640 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2641 * aligned */
2642 m->m_data += MXGEFW_PAD;
2644 m->m_pkthdr.rcvif = ifp;
2645 m->m_len = m->m_pkthdr.len = len;
2646 ss->ipackets++;
2647 eh = mtod(m, struct ether_header *);
2648 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2649 mxge_vlan_tag_remove(m, &csum);
2651 /* if the checksum is valid, mark it in the mbuf header */
2652 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2653 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2654 return;
2655 /* otherwise, it was a UDP frame, or a TCP frame which
2656 we could not do LRO on. Tell the stack that the
2657 checksum is good */
2658 m->m_pkthdr.csum_data = 0xffff;
2659 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2661 #if 0
2662 /* flowid only valid if RSS hashing is enabled */
2663 if (sc->num_slices > 1) {
2664 m->m_pkthdr.flowid = (ss - sc->ss);
2665 m->m_flags |= M_FLOWID;
2667 #endif
2668 ether_input_chain(ifp, m, NULL, chain);
2671 static inline void
2672 mxge_clean_rx_done(struct mxge_slice_state *ss)
2674 mxge_rx_done_t *rx_done = &ss->rx_done;
2675 int limit = 0;
2676 uint16_t length;
2677 uint16_t checksum;
2678 struct mbuf_chain chain[MAXCPU];
2680 ether_input_chain_init(chain);
2681 while (rx_done->entry[rx_done->idx].length != 0) {
2682 length = ntohs(rx_done->entry[rx_done->idx].length);
2683 rx_done->entry[rx_done->idx].length = 0;
2684 checksum = rx_done->entry[rx_done->idx].checksum;
2685 if (length <= (MHLEN - MXGEFW_PAD))
2686 mxge_rx_done_small(ss, length, checksum, chain);
2687 else
2688 mxge_rx_done_big(ss, length, checksum, chain);
2689 rx_done->cnt++;
2690 rx_done->idx = rx_done->cnt & rx_done->mask;
2692 /* limit potential for livelock */
2693 if (__predict_false(++limit > rx_done->mask / 2))
2694 break;
2696 ether_input_dispatch(chain);
2697 #ifdef INET
2698 while (!SLIST_EMPTY(&ss->lro_active)) {
2699 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
2700 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2701 mxge_lro_flush(ss, lro);
2703 #endif
2707 static inline void
2708 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2710 struct ifnet *ifp;
2711 mxge_tx_ring_t *tx;
2712 struct mbuf *m;
2713 bus_dmamap_t map;
2714 int idx;
2715 int *flags;
2717 tx = &ss->tx;
2718 ifp = ss->sc->ifp;
2719 ASSERT_SERIALIZED(ifp->if_serializer);
2720 while (tx->pkt_done != mcp_idx) {
2721 idx = tx->done & tx->mask;
2722 tx->done++;
2723 m = tx->info[idx].m;
2724 /* mbuf and DMA map only attached to the first
2725 segment per-mbuf */
2726 if (m != NULL) {
2727 ss->obytes += m->m_pkthdr.len;
2728 if (m->m_flags & M_MCAST)
2729 ss->omcasts++;
2730 ss->opackets++;
2731 tx->info[idx].m = NULL;
2732 map = tx->info[idx].map;
2733 bus_dmamap_unload(tx->dmat, map);
2734 m_freem(m);
2736 if (tx->info[idx].flag) {
2737 tx->info[idx].flag = 0;
2738 tx->pkt_done++;
2742 /* If we have space, clear IFF_OACTIVE to tell the stack that
2743 its OK to send packets */
2744 #ifdef IFNET_BUF_RING
2745 flags = &ss->if_flags;
2746 #else
2747 flags = &ifp->if_flags;
2748 #endif
2749 if ((*flags) & IFF_OACTIVE &&
2750 tx->req - tx->done < (tx->mask + 1)/4) {
2751 *(flags) &= ~IFF_OACTIVE;
2752 ss->tx.wake++;
2753 mxge_start_locked(ss);
2755 #ifdef IFNET_BUF_RING
2756 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2757 /* let the NIC stop polling this queue, since there
2758 * are no more transmits pending */
2759 if (tx->req == tx->done) {
2760 *tx->send_stop = 1;
2761 tx->queue_active = 0;
2762 tx->deactivate++;
2763 wmb();
2766 #endif
2770 static struct mxge_media_type mxge_xfp_media_types[] =
2772 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2773 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2774 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2775 {0, (1 << 5), "10GBASE-ER"},
2776 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2777 {0, (1 << 3), "10GBASE-SW"},
2778 {0, (1 << 2), "10GBASE-LW"},
2779 {0, (1 << 1), "10GBASE-EW"},
2780 {0, (1 << 0), "Reserved"}
2782 static struct mxge_media_type mxge_sfp_media_types[] =
2784 {0, (1 << 7), "Reserved"},
2785 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2786 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2787 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}
2790 static void
2791 mxge_set_media(mxge_softc_t *sc, int type)
2793 sc->media_flags |= type;
2794 ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
2795 ifmedia_set(&sc->media, sc->media_flags);
2800 * Determine the media type for a NIC. Some XFPs will identify
2801 * themselves only when their link is up, so this is initiated via a
2802 * link up interrupt. However, this can potentially take up to
2803 * several milliseconds, so it is run via the watchdog routine, rather
2804 * than in the interrupt handler itself. This need only be done
2805 * once, not each time the link is up.
2807 static void
2808 mxge_media_probe(mxge_softc_t *sc)
2810 mxge_cmd_t cmd;
2811 char *cage_type;
2812 char *ptr;
2813 struct mxge_media_type *mxge_media_types = NULL;
2814 int i, err, ms, mxge_media_type_entries;
2815 uint32_t byte;
2817 sc->need_media_probe = 0;
2819 /* if we've already set a media type, we're done */
2820 if (sc->media_flags != (IFM_ETHER | IFM_AUTO))
2821 return;
2824 * parse the product code to deterimine the interface type
2825 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2826 * after the 3rd dash in the driver's cached copy of the
2827 * EEPROM's product code string.
2829 ptr = sc->product_code_string;
2830 if (ptr == NULL) {
2831 device_printf(sc->dev, "Missing product code\n");
2834 for (i = 0; i < 3; i++, ptr++) {
2835 ptr = index(ptr, '-');
2836 if (ptr == NULL) {
2837 device_printf(sc->dev,
2838 "only %d dashes in PC?!?\n", i);
2839 return;
2842 if (*ptr == 'C') {
2843 /* -C is CX4 */
2844 mxge_set_media(sc, IFM_10G_CX4);
2845 return;
2847 else if (*ptr == 'Q') {
2848 /* -Q is Quad Ribbon Fiber */
2849 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2850 /* FreeBSD has no media type for Quad ribbon fiber */
2851 return;
2854 if (*ptr == 'R') {
2855 /* -R is XFP */
2856 mxge_media_types = mxge_xfp_media_types;
2857 mxge_media_type_entries =
2858 sizeof (mxge_xfp_media_types) /
2859 sizeof (mxge_xfp_media_types[0]);
2860 byte = MXGE_XFP_COMPLIANCE_BYTE;
2861 cage_type = "XFP";
2864 if (*ptr == 'S' || *(ptr +1) == 'S') {
2865 /* -S or -2S is SFP+ */
2866 mxge_media_types = mxge_sfp_media_types;
2867 mxge_media_type_entries =
2868 sizeof (mxge_sfp_media_types) /
2869 sizeof (mxge_sfp_media_types[0]);
2870 cage_type = "SFP+";
2871 byte = 3;
2874 if (mxge_media_types == NULL) {
2875 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2876 return;
2880 * At this point we know the NIC has an XFP cage, so now we
2881 * try to determine what is in the cage by using the
2882 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2883 * register. We read just one byte, which may take over
2884 * a millisecond
2887 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2888 cmd.data1 = byte;
2889 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2890 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2891 device_printf(sc->dev, "failed to read XFP\n");
2893 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2894 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2896 if (err != MXGEFW_CMD_OK) {
2897 return;
2900 /* now we wait for the data to be cached */
2901 cmd.data0 = byte;
2902 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2903 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2904 DELAY(1000);
2905 cmd.data0 = byte;
2906 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2908 if (err != MXGEFW_CMD_OK) {
2909 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2910 cage_type, err, ms);
2911 return;
2914 if (cmd.data0 == mxge_media_types[0].bitmask) {
2915 if (mxge_verbose)
2916 device_printf(sc->dev, "%s:%s\n", cage_type,
2917 mxge_media_types[0].name);
2918 mxge_set_media(sc, IFM_10G_CX4);
2919 return;
2921 for (i = 1; i < mxge_media_type_entries; i++) {
2922 if (cmd.data0 & mxge_media_types[i].bitmask) {
2923 if (mxge_verbose)
2924 device_printf(sc->dev, "%s:%s\n",
2925 cage_type,
2926 mxge_media_types[i].name);
2928 mxge_set_media(sc, mxge_media_types[i].flag);
2929 return;
2932 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type,
2933 cmd.data0);
2935 return;
2938 static void
2939 mxge_intr(void *arg)
2941 struct mxge_slice_state *ss = arg;
2942 mxge_softc_t *sc = ss->sc;
2943 mcp_irq_data_t *stats = ss->fw_stats;
2944 mxge_tx_ring_t *tx = &ss->tx;
2945 mxge_rx_done_t *rx_done = &ss->rx_done;
2946 uint32_t send_done_count;
2947 uint8_t valid;
2950 #ifndef IFNET_BUF_RING
2951 /* an interrupt on a non-zero slice is implicitly valid
2952 since MSI-X irqs are not shared */
2953 if (ss != sc->ss) {
2954 mxge_clean_rx_done(ss);
2955 *ss->irq_claim = be32toh(3);
2956 return;
2958 #endif
2960 /* make sure the DMA has finished */
2961 if (!stats->valid) {
2962 return;
2964 valid = stats->valid;
2966 if (sc->legacy_irq) {
2967 /* lower legacy IRQ */
2968 *sc->irq_deassert = 0;
2969 if (!mxge_deassert_wait)
2970 /* don't wait for conf. that irq is low */
2971 stats->valid = 0;
2972 } else {
2973 stats->valid = 0;
2976 /* loop while waiting for legacy irq deassertion */
2977 do {
2978 /* check for transmit completes and receives */
2979 send_done_count = be32toh(stats->send_done_count);
2980 while ((send_done_count != tx->pkt_done) ||
2981 (rx_done->entry[rx_done->idx].length != 0)) {
2982 if (send_done_count != tx->pkt_done)
2983 mxge_tx_done(ss, (int)send_done_count);
2984 mxge_clean_rx_done(ss);
2985 send_done_count = be32toh(stats->send_done_count);
2987 if (sc->legacy_irq && mxge_deassert_wait)
2988 wmb();
2989 } while (*((volatile uint8_t *) &stats->valid));
2991 /* fw link & error stats meaningful only on the first slice */
2992 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
2993 if (sc->link_state != stats->link_up) {
2994 sc->link_state = stats->link_up;
2995 if (sc->link_state) {
2996 sc->ifp->if_link_state = LINK_STATE_UP;
2997 if_link_state_change(sc->ifp);
2998 if (mxge_verbose)
2999 device_printf(sc->dev, "link up\n");
3000 } else {
3001 sc->ifp->if_link_state = LINK_STATE_DOWN;
3002 if_link_state_change(sc->ifp);
3003 if (mxge_verbose)
3004 device_printf(sc->dev, "link down\n");
3006 sc->need_media_probe = 1;
3008 if (sc->rdma_tags_available !=
3009 be32toh(stats->rdma_tags_available)) {
3010 sc->rdma_tags_available =
3011 be32toh(stats->rdma_tags_available);
3012 device_printf(sc->dev, "RDMA timed out! %d tags "
3013 "left\n", sc->rdma_tags_available);
3016 if (stats->link_down) {
3017 sc->down_cnt += stats->link_down;
3018 sc->link_state = 0;
3019 sc->ifp->if_link_state = LINK_STATE_DOWN;
3020 if_link_state_change(sc->ifp);
3024 /* check to see if we have rx token to pass back */
3025 if (valid & 0x1)
3026 *ss->irq_claim = be32toh(3);
3027 *(ss->irq_claim + 1) = be32toh(3);
3030 static void
3031 mxge_init(void *arg)
3037 static void
3038 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3040 struct lro_entry *lro_entry;
3041 int i;
3043 while (!SLIST_EMPTY(&ss->lro_free)) {
3044 lro_entry = SLIST_FIRST(&ss->lro_free);
3045 SLIST_REMOVE_HEAD(&ss->lro_free, next);
3046 kfree(lro_entry, M_DEVBUF);
3049 for (i = 0; i <= ss->rx_big.mask; i++) {
3050 if (ss->rx_big.info[i].m == NULL)
3051 continue;
3052 bus_dmamap_unload(ss->rx_big.dmat,
3053 ss->rx_big.info[i].map);
3054 m_freem(ss->rx_big.info[i].m);
3055 ss->rx_big.info[i].m = NULL;
3058 for (i = 0; i <= ss->rx_small.mask; i++) {
3059 if (ss->rx_small.info[i].m == NULL)
3060 continue;
3061 bus_dmamap_unload(ss->rx_small.dmat,
3062 ss->rx_small.info[i].map);
3063 m_freem(ss->rx_small.info[i].m);
3064 ss->rx_small.info[i].m = NULL;
3067 /* transmit ring used only on the first slice */
3068 if (ss->tx.info == NULL)
3069 return;
3071 for (i = 0; i <= ss->tx.mask; i++) {
3072 ss->tx.info[i].flag = 0;
3073 if (ss->tx.info[i].m == NULL)
3074 continue;
3075 bus_dmamap_unload(ss->tx.dmat,
3076 ss->tx.info[i].map);
3077 m_freem(ss->tx.info[i].m);
3078 ss->tx.info[i].m = NULL;
3082 static void
3083 mxge_free_mbufs(mxge_softc_t *sc)
3085 int slice;
3087 for (slice = 0; slice < sc->num_slices; slice++)
3088 mxge_free_slice_mbufs(&sc->ss[slice]);
3091 static void
3092 mxge_free_slice_rings(struct mxge_slice_state *ss)
3094 int i;
3097 if (ss->rx_done.entry != NULL)
3098 mxge_dma_free(&ss->rx_done.dma);
3099 ss->rx_done.entry = NULL;
3101 if (ss->tx.req_bytes != NULL)
3102 kfree(ss->tx.req_bytes, M_DEVBUF);
3103 ss->tx.req_bytes = NULL;
3105 if (ss->tx.seg_list != NULL)
3106 kfree(ss->tx.seg_list, M_DEVBUF);
3107 ss->tx.seg_list = NULL;
3109 if (ss->rx_small.shadow != NULL)
3110 kfree(ss->rx_small.shadow, M_DEVBUF);
3111 ss->rx_small.shadow = NULL;
3113 if (ss->rx_big.shadow != NULL)
3114 kfree(ss->rx_big.shadow, M_DEVBUF);
3115 ss->rx_big.shadow = NULL;
3117 if (ss->tx.info != NULL) {
3118 if (ss->tx.dmat != NULL) {
3119 for (i = 0; i <= ss->tx.mask; i++) {
3120 bus_dmamap_destroy(ss->tx.dmat,
3121 ss->tx.info[i].map);
3123 bus_dma_tag_destroy(ss->tx.dmat);
3125 kfree(ss->tx.info, M_DEVBUF);
3127 ss->tx.info = NULL;
3129 if (ss->rx_small.info != NULL) {
3130 if (ss->rx_small.dmat != NULL) {
3131 for (i = 0; i <= ss->rx_small.mask; i++) {
3132 bus_dmamap_destroy(ss->rx_small.dmat,
3133 ss->rx_small.info[i].map);
3135 bus_dmamap_destroy(ss->rx_small.dmat,
3136 ss->rx_small.extra_map);
3137 bus_dma_tag_destroy(ss->rx_small.dmat);
3139 kfree(ss->rx_small.info, M_DEVBUF);
3141 ss->rx_small.info = NULL;
3143 if (ss->rx_big.info != NULL) {
3144 if (ss->rx_big.dmat != NULL) {
3145 for (i = 0; i <= ss->rx_big.mask; i++) {
3146 bus_dmamap_destroy(ss->rx_big.dmat,
3147 ss->rx_big.info[i].map);
3149 bus_dmamap_destroy(ss->rx_big.dmat,
3150 ss->rx_big.extra_map);
3151 bus_dma_tag_destroy(ss->rx_big.dmat);
3153 kfree(ss->rx_big.info, M_DEVBUF);
3155 ss->rx_big.info = NULL;
3158 static void
3159 mxge_free_rings(mxge_softc_t *sc)
3161 int slice;
3163 for (slice = 0; slice < sc->num_slices; slice++)
3164 mxge_free_slice_rings(&sc->ss[slice]);
3167 static int
3168 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3169 int tx_ring_entries)
3171 mxge_softc_t *sc = ss->sc;
3172 size_t bytes;
3173 int err, i;
3175 err = ENOMEM;
3177 /* allocate per-slice receive resources */
3179 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3180 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3182 /* allocate the rx shadow rings */
3183 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3184 ss->rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3185 if (ss->rx_small.shadow == NULL)
3186 return err;;
3188 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3189 ss->rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3190 if (ss->rx_big.shadow == NULL)
3191 return err;;
3193 /* allocate the rx host info rings */
3194 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3195 ss->rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3196 if (ss->rx_small.info == NULL)
3197 return err;;
3199 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3200 ss->rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3201 if (ss->rx_big.info == NULL)
3202 return err;;
3204 /* allocate the rx busdma resources */
3205 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3206 1, /* alignment */
3207 4096, /* boundary */
3208 BUS_SPACE_MAXADDR, /* low */
3209 BUS_SPACE_MAXADDR, /* high */
3210 NULL, NULL, /* filter */
3211 MHLEN, /* maxsize */
3212 1, /* num segs */
3213 MHLEN, /* maxsegsize */
3214 BUS_DMA_ALLOCNOW, /* flags */
3215 &ss->rx_small.dmat); /* tag */
3216 if (err != 0) {
3217 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3218 err);
3219 return err;;
3222 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3223 1, /* alignment */
3224 #if MXGE_VIRT_JUMBOS
3225 4096, /* boundary */
3226 #else
3227 0, /* boundary */
3228 #endif
3229 BUS_SPACE_MAXADDR, /* low */
3230 BUS_SPACE_MAXADDR, /* high */
3231 NULL, NULL, /* filter */
3232 3*4096, /* maxsize */
3233 #if MXGE_VIRT_JUMBOS
3234 3, /* num segs */
3235 4096, /* maxsegsize*/
3236 #else
3237 1, /* num segs */
3238 MJUM9BYTES, /* maxsegsize*/
3239 #endif
3240 BUS_DMA_ALLOCNOW, /* flags */
3241 &ss->rx_big.dmat); /* tag */
3242 if (err != 0) {
3243 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3244 err);
3245 return err;;
3247 for (i = 0; i <= ss->rx_small.mask; i++) {
3248 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3249 &ss->rx_small.info[i].map);
3250 if (err != 0) {
3251 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3252 err);
3253 return err;;
3256 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3257 &ss->rx_small.extra_map);
3258 if (err != 0) {
3259 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3260 err);
3261 return err;;
3264 for (i = 0; i <= ss->rx_big.mask; i++) {
3265 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3266 &ss->rx_big.info[i].map);
3267 if (err != 0) {
3268 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3269 err);
3270 return err;;
3273 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3274 &ss->rx_big.extra_map);
3275 if (err != 0) {
3276 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3277 err);
3278 return err;;
3281 /* now allocate TX resouces */
3283 #ifndef IFNET_BUF_RING
3284 /* only use a single TX ring for now */
3285 if (ss != ss->sc->ss)
3286 return 0;
3287 #endif
3289 ss->tx.mask = tx_ring_entries - 1;
3290 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3293 /* allocate the tx request copy block */
3294 bytes = 8 +
3295 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3296 ss->tx.req_bytes = kmalloc(bytes, M_DEVBUF, M_WAITOK);
3297 if (ss->tx.req_bytes == NULL)
3298 return err;;
3299 /* ensure req_list entries are aligned to 8 bytes */
3300 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3301 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3303 /* allocate the tx busdma segment list */
3304 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3305 ss->tx.seg_list = (bus_dma_segment_t *)
3306 kmalloc(bytes, M_DEVBUF, M_WAITOK);
3307 if (ss->tx.seg_list == NULL)
3308 return err;;
3310 /* allocate the tx host info ring */
3311 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3312 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3313 if (ss->tx.info == NULL)
3314 return err;;
3316 /* allocate the tx busdma resources */
3317 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3318 1, /* alignment */
3319 sc->tx_boundary, /* boundary */
3320 BUS_SPACE_MAXADDR, /* low */
3321 BUS_SPACE_MAXADDR, /* high */
3322 NULL, NULL, /* filter */
3323 65536 + 256, /* maxsize */
3324 ss->tx.max_desc - 2, /* num segs */
3325 sc->tx_boundary, /* maxsegsz */
3326 BUS_DMA_ALLOCNOW, /* flags */
3327 &ss->tx.dmat); /* tag */
3329 if (err != 0) {
3330 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3331 err);
3332 return err;;
3335 /* now use these tags to setup dmamaps for each slot
3336 in the ring */
3337 for (i = 0; i <= ss->tx.mask; i++) {
3338 err = bus_dmamap_create(ss->tx.dmat, 0,
3339 &ss->tx.info[i].map);
3340 if (err != 0) {
3341 device_printf(sc->dev, "Err %d tx dmamap\n",
3342 err);
3343 return err;;
3346 return 0;
3350 static int
3351 mxge_alloc_rings(mxge_softc_t *sc)
3353 mxge_cmd_t cmd;
3354 int tx_ring_size;
3355 int tx_ring_entries, rx_ring_entries;
3356 int err, slice;
3358 /* get ring sizes */
3359 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3360 tx_ring_size = cmd.data0;
3361 if (err != 0) {
3362 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3363 goto abort;
3366 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3367 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3368 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1);
3369 ifq_set_ready(&sc->ifp->if_snd);
3371 for (slice = 0; slice < sc->num_slices; slice++) {
3372 err = mxge_alloc_slice_rings(&sc->ss[slice],
3373 rx_ring_entries,
3374 tx_ring_entries);
3375 if (err != 0)
3376 goto abort;
3378 return 0;
3380 abort:
3381 mxge_free_rings(sc);
3382 return err;
3387 static void
3388 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3390 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD;
3392 if (bufsize < MCLBYTES) {
3393 /* easy, everything fits in a single buffer */
3394 *big_buf_size = MCLBYTES;
3395 *cl_size = MCLBYTES;
3396 *nbufs = 1;
3397 return;
3400 if (bufsize < MJUMPAGESIZE) {
3401 /* still easy, everything still fits in a single buffer */
3402 *big_buf_size = MJUMPAGESIZE;
3403 *cl_size = MJUMPAGESIZE;
3404 *nbufs = 1;
3405 return;
3407 #if MXGE_VIRT_JUMBOS
3408 /* now we need to use virtually contiguous buffers */
3409 *cl_size = MJUM9BYTES;
3410 *big_buf_size = 4096;
3411 *nbufs = mtu / 4096 + 1;
3412 /* needs to be a power of two, so round up */
3413 if (*nbufs == 3)
3414 *nbufs = 4;
3415 #else
3416 *cl_size = MJUM9BYTES;
3417 *big_buf_size = MJUM9BYTES;
3418 *nbufs = 1;
3419 #endif
3422 static int
3423 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3425 mxge_softc_t *sc;
3426 mxge_cmd_t cmd;
3427 bus_dmamap_t map;
3428 struct lro_entry *lro_entry;
3429 int err, i, slice;
3432 sc = ss->sc;
3433 slice = ss - sc->ss;
3435 SLIST_INIT(&ss->lro_free);
3436 SLIST_INIT(&ss->lro_active);
3438 for (i = 0; i < sc->lro_cnt; i++) {
3439 lro_entry = (struct lro_entry *)
3440 kmalloc(sizeof (*lro_entry), M_DEVBUF,
3441 M_NOWAIT | M_ZERO);
3442 if (lro_entry == NULL) {
3443 sc->lro_cnt = i;
3444 break;
3446 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3448 /* get the lanai pointers to the send and receive rings */
3450 err = 0;
3451 #ifndef IFNET_BUF_RING
3452 /* We currently only send from the first slice */
3453 if (slice == 0) {
3454 #endif
3455 cmd.data0 = slice;
3456 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3457 ss->tx.lanai =
3458 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3459 ss->tx.send_go = (volatile uint32_t *)
3460 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3461 ss->tx.send_stop = (volatile uint32_t *)
3462 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3463 #ifndef IFNET_BUF_RING
3465 #endif
3466 cmd.data0 = slice;
3467 err |= mxge_send_cmd(sc,
3468 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3469 ss->rx_small.lanai =
3470 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3471 cmd.data0 = slice;
3472 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3473 ss->rx_big.lanai =
3474 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3476 if (err != 0) {
3477 device_printf(sc->dev,
3478 "failed to get ring sizes or locations\n");
3479 return EIO;
3482 /* stock receive rings */
3483 for (i = 0; i <= ss->rx_small.mask; i++) {
3484 map = ss->rx_small.info[i].map;
3485 err = mxge_get_buf_small(ss, map, i);
3486 if (err) {
3487 device_printf(sc->dev, "alloced %d/%d smalls\n",
3488 i, ss->rx_small.mask + 1);
3489 return ENOMEM;
3492 for (i = 0; i <= ss->rx_big.mask; i++) {
3493 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3494 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3496 ss->rx_big.nbufs = nbufs;
3497 ss->rx_big.cl_size = cl_size;
3498 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3499 EVL_ENCAPLEN + MXGEFW_PAD;
3500 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3501 map = ss->rx_big.info[i].map;
3502 err = mxge_get_buf_big(ss, map, i);
3503 if (err) {
3504 device_printf(sc->dev, "alloced %d/%d bigs\n",
3505 i, ss->rx_big.mask + 1);
3506 return ENOMEM;
3509 return 0;
3512 static int
3513 mxge_open(mxge_softc_t *sc)
3515 mxge_cmd_t cmd;
3516 int err, big_bytes, nbufs, slice, cl_size, i;
3517 bus_addr_t bus;
3518 volatile uint8_t *itable;
3519 struct mxge_slice_state *ss;
3521 ASSERT_SERIALIZED(sc->ifp->if_serializer);
3522 /* Copy the MAC address in case it was overridden */
3523 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3525 err = mxge_reset(sc, 1);
3526 if (err != 0) {
3527 device_printf(sc->dev, "failed to reset\n");
3528 return EIO;
3531 if (sc->num_slices > 1) {
3532 /* setup the indirection table */
3533 cmd.data0 = sc->num_slices;
3534 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3535 &cmd);
3537 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3538 &cmd);
3539 if (err != 0) {
3540 device_printf(sc->dev,
3541 "failed to setup rss tables\n");
3542 return err;
3545 /* just enable an identity mapping */
3546 itable = sc->sram + cmd.data0;
3547 for (i = 0; i < sc->num_slices; i++)
3548 itable[i] = (uint8_t)i;
3550 cmd.data0 = 1;
3551 cmd.data1 = mxge_rss_hash_type;
3552 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3553 if (err != 0) {
3554 device_printf(sc->dev, "failed to enable slices\n");
3555 return err;
3560 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3562 cmd.data0 = nbufs;
3563 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3564 &cmd);
3565 /* error is only meaningful if we're trying to set
3566 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3567 if (err && nbufs > 1) {
3568 device_printf(sc->dev,
3569 "Failed to set alway-use-n to %d\n",
3570 nbufs);
3571 return EIO;
3573 /* Give the firmware the mtu and the big and small buffer
3574 sizes. The firmware wants the big buf size to be a power
3575 of two. Luckily, FreeBSD's clusters are powers of two */
3576 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3577 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3578 cmd.data0 = MHLEN - MXGEFW_PAD;
3579 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3580 &cmd);
3581 cmd.data0 = big_bytes;
3582 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3584 if (err != 0) {
3585 device_printf(sc->dev, "failed to setup params\n");
3586 goto abort;
3589 /* Now give him the pointer to the stats block */
3590 for (slice = 0;
3591 #ifdef IFNET_BUF_RING
3592 slice < sc->num_slices;
3593 #else
3594 slice < 1;
3595 #endif
3596 slice++) {
3597 ss = &sc->ss[slice];
3598 cmd.data0 =
3599 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3600 cmd.data1 =
3601 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3602 cmd.data2 = sizeof(struct mcp_irq_data);
3603 cmd.data2 |= (slice << 16);
3604 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3607 if (err != 0) {
3608 bus = sc->ss->fw_stats_dma.bus_addr;
3609 bus += offsetof(struct mcp_irq_data, send_done_count);
3610 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3611 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3612 err = mxge_send_cmd(sc,
3613 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3614 &cmd);
3615 /* Firmware cannot support multicast without STATS_DMA_V2 */
3616 sc->fw_multicast_support = 0;
3617 } else {
3618 sc->fw_multicast_support = 1;
3621 if (err != 0) {
3622 device_printf(sc->dev, "failed to setup params\n");
3623 goto abort;
3626 for (slice = 0; slice < sc->num_slices; slice++) {
3627 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3628 if (err != 0) {
3629 device_printf(sc->dev, "couldn't open slice %d\n",
3630 slice);
3631 goto abort;
3635 /* Finally, start the firmware running */
3636 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3637 if (err) {
3638 device_printf(sc->dev, "Couldn't bring up link\n");
3639 goto abort;
3641 #ifdef IFNET_BUF_RING
3642 for (slice = 0; slice < sc->num_slices; slice++) {
3643 ss = &sc->ss[slice];
3644 ss->if_flags |= IFF_RUNNING;
3645 ss->if_flags &= ~IFF_OACTIVE;
3647 #endif
3648 sc->ifp->if_flags |= IFF_RUNNING;
3649 sc->ifp->if_flags &= ~IFF_OACTIVE;
3650 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3652 return 0;
3655 abort:
3656 mxge_free_mbufs(sc);
3658 return err;
3661 static int
3662 mxge_close(mxge_softc_t *sc)
3664 mxge_cmd_t cmd;
3665 int err, old_down_cnt;
3666 #ifdef IFNET_BUF_RING
3667 struct mxge_slice_state *ss;
3668 int slice;
3669 #endif
3671 ASSERT_SERIALIZED(sc->ifp->if_serializer);
3672 callout_stop(&sc->co_hdl);
3673 #ifdef IFNET_BUF_RING
3674 for (slice = 0; slice < sc->num_slices; slice++) {
3675 ss = &sc->ss[slice];
3676 ss->if_flags &= ~IFF_RUNNING;
3678 #endif
3679 sc->ifp->if_flags &= ~IFF_RUNNING;
3680 old_down_cnt = sc->down_cnt;
3681 wmb();
3682 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3683 if (err) {
3684 device_printf(sc->dev, "Couldn't bring down link\n");
3686 if (old_down_cnt == sc->down_cnt) {
3687 /* wait for down irq */
3688 DELAY(10 * sc->intr_coal_delay);
3690 wmb();
3691 if (old_down_cnt == sc->down_cnt) {
3692 device_printf(sc->dev, "never got down irq\n");
3695 mxge_free_mbufs(sc);
3697 return 0;
3700 static void
3701 mxge_setup_cfg_space(mxge_softc_t *sc)
3703 device_t dev = sc->dev;
3704 int reg;
3705 uint16_t cmd, lnk, pectl;
3707 /* find the PCIe link width and set max read request to 4KB*/
3708 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3709 lnk = pci_read_config(dev, reg + 0x12, 2);
3710 sc->link_width = (lnk >> 4) & 0x3f;
3712 pectl = pci_read_config(dev, reg + 0x8, 2);
3713 pectl = (pectl & ~0x7000) | (5 << 12);
3714 pci_write_config(dev, reg + 0x8, pectl, 2);
3717 /* Enable DMA and Memory space access */
3718 pci_enable_busmaster(dev);
3719 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3720 cmd |= PCIM_CMD_MEMEN;
3721 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3724 static uint32_t
3725 mxge_read_reboot(mxge_softc_t *sc)
3727 device_t dev = sc->dev;
3728 uint32_t vs;
3730 /* find the vendor specific offset */
3731 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3732 device_printf(sc->dev,
3733 "could not find vendor specific offset\n");
3734 return (uint32_t)-1;
3736 /* enable read32 mode */
3737 pci_write_config(dev, vs + 0x10, 0x3, 1);
3738 /* tell NIC which register to read */
3739 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3740 return (pci_read_config(dev, vs + 0x14, 4));
3743 static int
3744 mxge_watchdog_reset(mxge_softc_t *sc, int slice)
3746 struct pci_devinfo *dinfo;
3747 mxge_tx_ring_t *tx;
3748 int err;
3749 uint32_t reboot;
3750 uint16_t cmd;
3752 err = ENXIO;
3754 device_printf(sc->dev, "Watchdog reset!\n");
3757 * check to see if the NIC rebooted. If it did, then all of
3758 * PCI config space has been reset, and things like the
3759 * busmaster bit will be zero. If this is the case, then we
3760 * must restore PCI config space before the NIC can be used
3761 * again
3763 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3764 if (cmd == 0xffff) {
3766 * maybe the watchdog caught the NIC rebooting; wait
3767 * up to 100ms for it to finish. If it does not come
3768 * back, then give up
3770 DELAY(1000*100);
3771 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3772 if (cmd == 0xffff) {
3773 device_printf(sc->dev, "NIC disappeared!\n");
3774 return (err);
3777 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3778 /* print the reboot status */
3779 reboot = mxge_read_reboot(sc);
3780 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3781 reboot);
3782 /* restore PCI configuration space */
3783 dinfo = device_get_ivars(sc->dev);
3784 pci_cfg_restore(sc->dev, dinfo);
3786 /* and redo any changes we made to our config space */
3787 mxge_setup_cfg_space(sc);
3789 if (sc->ifp->if_flags & IFF_RUNNING) {
3790 mxge_close(sc);
3791 err = mxge_open(sc);
3793 } else {
3794 tx = &sc->ss[slice].tx;
3795 device_printf(sc->dev,
3796 "NIC did not reboot, slice %d ring state:\n",
3797 slice);
3798 device_printf(sc->dev,
3799 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3800 tx->req, tx->done, tx->queue_active);
3801 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3802 tx->activate, tx->deactivate);
3803 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3804 tx->pkt_done,
3805 be32toh(sc->ss->fw_stats->send_done_count));
3806 device_printf(sc->dev, "not resetting\n");
3808 return (err);
3811 static int
3812 mxge_watchdog(mxge_softc_t *sc)
3814 mxge_tx_ring_t *tx;
3815 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3816 int i, err = 0;
3818 /* see if we have outstanding transmits, which
3819 have been pending for more than mxge_ticks */
3820 for (i = 0;
3821 #ifdef IFNET_BUF_RING
3822 (i < sc->num_slices) && (err == 0);
3823 #else
3824 (i < 1) && (err == 0);
3825 #endif
3826 i++) {
3827 tx = &sc->ss[i].tx;
3828 if (tx->req != tx->done &&
3829 tx->watchdog_req != tx->watchdog_done &&
3830 tx->done == tx->watchdog_done) {
3831 /* check for pause blocking before resetting */
3832 if (tx->watchdog_rx_pause == rx_pause)
3833 err = mxge_watchdog_reset(sc, i);
3834 else
3835 device_printf(sc->dev, "Flow control blocking "
3836 "xmits, check link partner\n");
3839 tx->watchdog_req = tx->req;
3840 tx->watchdog_done = tx->done;
3841 tx->watchdog_rx_pause = rx_pause;
3844 if (sc->need_media_probe)
3845 mxge_media_probe(sc);
3846 return (err);
3849 static void
3850 mxge_update_stats(mxge_softc_t *sc)
3852 struct mxge_slice_state *ss;
3853 u_long ipackets = 0;
3854 u_long opackets = 0;
3855 #ifdef IFNET_BUF_RING
3856 u_long obytes = 0;
3857 u_long omcasts = 0;
3858 u_long odrops = 0;
3859 #endif
3860 u_long oerrors = 0;
3861 int slice;
3863 for (slice = 0; slice < sc->num_slices; slice++) {
3864 ss = &sc->ss[slice];
3865 ipackets += ss->ipackets;
3866 opackets += ss->opackets;
3867 #ifdef IFNET_BUF_RING
3868 obytes += ss->obytes;
3869 omcasts += ss->omcasts;
3870 odrops += ss->tx.br->br_drops;
3871 #endif
3872 oerrors += ss->oerrors;
3874 sc->ifp->if_ipackets = ipackets;
3875 sc->ifp->if_opackets = opackets;
3876 #ifdef IFNET_BUF_RING
3877 sc->ifp->if_obytes = obytes;
3878 sc->ifp->if_omcasts = omcasts;
3879 sc->ifp->if_snd.ifq_drops = odrops;
3880 #endif
3881 sc->ifp->if_oerrors = oerrors;
3884 static void
3885 mxge_tick(void *arg)
3887 mxge_softc_t *sc = arg;
3888 int err = 0;
3890 lwkt_serialize_enter(sc->ifp->if_serializer);
3891 /* aggregate stats from different slices */
3892 mxge_update_stats(sc);
3893 if (!sc->watchdog_countdown) {
3894 err = mxge_watchdog(sc);
3895 sc->watchdog_countdown = 4;
3897 sc->watchdog_countdown--;
3898 if (err == 0)
3899 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3900 lwkt_serialize_exit(sc->ifp->if_serializer);
3903 static int
3904 mxge_media_change(struct ifnet *ifp)
3906 return EINVAL;
3909 static int
3910 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3912 struct ifnet *ifp = sc->ifp;
3913 int real_mtu, old_mtu;
3914 int err = 0;
3916 if (ifp->if_serializer)
3917 ASSERT_SERIALIZED(ifp->if_serializer);
3919 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3920 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3921 return EINVAL;
3922 old_mtu = ifp->if_mtu;
3923 ifp->if_mtu = mtu;
3924 if (ifp->if_flags & IFF_RUNNING) {
3925 mxge_close(sc);
3926 err = mxge_open(sc);
3927 if (err != 0) {
3928 ifp->if_mtu = old_mtu;
3929 mxge_close(sc);
3930 (void) mxge_open(sc);
3933 return err;
3936 static void
3937 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3939 mxge_softc_t *sc = ifp->if_softc;
3942 if (sc == NULL)
3943 return;
3944 ifmr->ifm_status = IFM_AVALID;
3945 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3946 ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
3947 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
3950 static int
3951 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
3953 mxge_softc_t *sc = ifp->if_softc;
3954 struct ifreq *ifr = (struct ifreq *)data;
3955 int err, mask;
3957 (void)cr;
3958 err = 0;
3959 ASSERT_SERIALIZED(ifp->if_serializer);
3960 switch (command) {
3961 case SIOCSIFADDR:
3962 case SIOCGIFADDR:
3963 err = ether_ioctl(ifp, command, data);
3964 break;
3966 case SIOCSIFMTU:
3967 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3968 break;
3970 case SIOCSIFFLAGS:
3971 if (sc->dying) {
3972 return EINVAL;
3974 if (ifp->if_flags & IFF_UP) {
3975 if (!(ifp->if_flags & IFF_RUNNING)) {
3976 err = mxge_open(sc);
3977 } else {
3978 /* take care of promis can allmulti
3979 flag chages */
3980 mxge_change_promisc(sc,
3981 ifp->if_flags & IFF_PROMISC);
3982 mxge_set_multicast_list(sc);
3984 } else {
3985 if (ifp->if_flags & IFF_RUNNING) {
3986 mxge_close(sc);
3989 break;
3991 case SIOCADDMULTI:
3992 case SIOCDELMULTI:
3993 mxge_set_multicast_list(sc);
3994 break;
3996 case SIOCSIFCAP:
3997 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3998 if (mask & IFCAP_TXCSUM) {
3999 if (IFCAP_TXCSUM & ifp->if_capenable) {
4000 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4001 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
4002 | CSUM_TSO);
4003 } else {
4004 ifp->if_capenable |= IFCAP_TXCSUM;
4005 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4007 } else if (mask & IFCAP_RXCSUM) {
4008 if (IFCAP_RXCSUM & ifp->if_capenable) {
4009 ifp->if_capenable &= ~IFCAP_RXCSUM;
4010 sc->csum_flag = 0;
4011 } else {
4012 ifp->if_capenable |= IFCAP_RXCSUM;
4013 sc->csum_flag = 1;
4016 if (mask & IFCAP_TSO4) {
4017 if (IFCAP_TSO4 & ifp->if_capenable) {
4018 ifp->if_capenable &= ~IFCAP_TSO4;
4019 ifp->if_hwassist &= ~CSUM_TSO;
4020 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4021 ifp->if_capenable |= IFCAP_TSO4;
4022 ifp->if_hwassist |= CSUM_TSO;
4023 } else {
4024 kprintf("mxge requires tx checksum offload"
4025 " be enabled to use TSO\n");
4026 err = EINVAL;
4029 if (mask & IFCAP_LRO) {
4030 if (IFCAP_LRO & ifp->if_capenable)
4031 err = mxge_change_lro_locked(sc, 0);
4032 else
4033 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
4035 if (mask & IFCAP_VLAN_HWTAGGING)
4036 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4037 VLAN_CAPABILITIES(ifp);
4039 break;
4041 case SIOCGIFMEDIA:
4042 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4043 &sc->media, command);
4044 break;
4046 default:
4047 err = ENOTTY;
4049 return err;
4052 static void
4053 mxge_fetch_tunables(mxge_softc_t *sc)
4056 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4057 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4058 &mxge_flow_control);
4059 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4060 &mxge_intr_coal_delay);
4061 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4062 &mxge_nvidia_ecrc_enable);
4063 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4064 &mxge_force_firmware);
4065 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4066 &mxge_deassert_wait);
4067 TUNABLE_INT_FETCH("hw.mxge.verbose",
4068 &mxge_verbose);
4069 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4070 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
4071 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4072 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4073 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4074 if (sc->lro_cnt != 0)
4075 mxge_lro_cnt = sc->lro_cnt;
4077 if (bootverbose)
4078 mxge_verbose = 1;
4079 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4080 mxge_intr_coal_delay = 30;
4081 if (mxge_ticks == 0)
4082 mxge_ticks = hz / 2;
4083 sc->pause = mxge_flow_control;
4084 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4085 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4086 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
4088 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4089 mxge_initial_mtu < ETHER_MIN_LEN)
4090 mxge_initial_mtu = ETHERMTU_JUMBO;
4094 static void
4095 mxge_free_slices(mxge_softc_t *sc)
4097 struct mxge_slice_state *ss;
4098 int i;
4101 if (sc->ss == NULL)
4102 return;
4104 for (i = 0; i < sc->num_slices; i++) {
4105 ss = &sc->ss[i];
4106 if (ss->fw_stats != NULL) {
4107 mxge_dma_free(&ss->fw_stats_dma);
4108 ss->fw_stats = NULL;
4109 #ifdef IFNET_BUF_RING
4110 if (ss->tx.br != NULL) {
4111 drbr_free(ss->tx.br, M_DEVBUF);
4112 ss->tx.br = NULL;
4114 #endif
4116 if (ss->rx_done.entry != NULL) {
4117 mxge_dma_free(&ss->rx_done.dma);
4118 ss->rx_done.entry = NULL;
4121 kfree(sc->ss, M_DEVBUF);
4122 sc->ss = NULL;
4125 static int
4126 mxge_alloc_slices(mxge_softc_t *sc)
4128 mxge_cmd_t cmd;
4129 struct mxge_slice_state *ss;
4130 size_t bytes;
4131 int err, i, max_intr_slots;
4133 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4134 if (err != 0) {
4135 device_printf(sc->dev, "Cannot determine rx ring size\n");
4136 return err;
4138 sc->rx_ring_size = cmd.data0;
4139 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4141 bytes = sizeof (*sc->ss) * sc->num_slices;
4142 sc->ss = kmalloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4143 if (sc->ss == NULL)
4144 return (ENOMEM);
4145 for (i = 0; i < sc->num_slices; i++) {
4146 ss = &sc->ss[i];
4148 ss->sc = sc;
4150 /* allocate per-slice rx interrupt queues */
4152 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4153 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4154 if (err != 0)
4155 goto abort;
4156 ss->rx_done.entry = ss->rx_done.dma.addr;
4157 bzero(ss->rx_done.entry, bytes);
4160 * allocate the per-slice firmware stats; stats
4161 * (including tx) are used used only on the first
4162 * slice for now
4164 #ifndef IFNET_BUF_RING
4165 if (i > 0)
4166 continue;
4167 #endif
4169 bytes = sizeof (*ss->fw_stats);
4170 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4171 sizeof (*ss->fw_stats), 64);
4172 if (err != 0)
4173 goto abort;
4174 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4175 #ifdef IFNET_BUF_RING
4176 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4177 &ss->tx.lock);
4178 #endif
4181 return (0);
4183 abort:
4184 mxge_free_slices(sc);
4185 return (ENOMEM);
4188 static void
4189 mxge_slice_probe(mxge_softc_t *sc)
4191 mxge_cmd_t cmd;
4192 char *old_fw;
4193 int msix_cnt, status, max_intr_slots;
4195 sc->num_slices = 1;
4197 * don't enable multiple slices if they are not enabled,
4198 * or if this is not an SMP system
4201 if (mxge_max_slices == 0 || mxge_max_slices == 1 || ncpus < 2)
4202 return;
4204 /* see how many MSI-X interrupts are available */
4205 msix_cnt = pci_msix_count(sc->dev);
4206 if (msix_cnt < 2)
4207 return;
4209 /* now load the slice aware firmware see what it supports */
4210 old_fw = sc->fw_name;
4211 if (old_fw == mxge_fw_aligned)
4212 sc->fw_name = mxge_fw_rss_aligned;
4213 else
4214 sc->fw_name = mxge_fw_rss_unaligned;
4215 status = mxge_load_firmware(sc, 0);
4216 if (status != 0) {
4217 device_printf(sc->dev, "Falling back to a single slice\n");
4218 return;
4221 /* try to send a reset command to the card to see if it
4222 is alive */
4223 memset(&cmd, 0, sizeof (cmd));
4224 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4225 if (status != 0) {
4226 device_printf(sc->dev, "failed reset\n");
4227 goto abort_with_fw;
4230 /* get rx ring size */
4231 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4232 if (status != 0) {
4233 device_printf(sc->dev, "Cannot determine rx ring size\n");
4234 goto abort_with_fw;
4236 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4238 /* tell it the size of the interrupt queues */
4239 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4240 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4241 if (status != 0) {
4242 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4243 goto abort_with_fw;
4246 /* ask the maximum number of slices it supports */
4247 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4248 if (status != 0) {
4249 device_printf(sc->dev,
4250 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4251 goto abort_with_fw;
4253 sc->num_slices = cmd.data0;
4254 if (sc->num_slices > msix_cnt)
4255 sc->num_slices = msix_cnt;
4257 if (mxge_max_slices == -1) {
4258 /* cap to number of CPUs in system */
4259 if (sc->num_slices > ncpus)
4260 sc->num_slices = ncpus;
4261 } else {
4262 if (sc->num_slices > mxge_max_slices)
4263 sc->num_slices = mxge_max_slices;
4265 /* make sure it is a power of two */
4266 while (sc->num_slices & (sc->num_slices - 1))
4267 sc->num_slices--;
4269 if (mxge_verbose)
4270 device_printf(sc->dev, "using %d slices\n",
4271 sc->num_slices);
4273 return;
4275 abort_with_fw:
4276 sc->fw_name = old_fw;
4277 (void) mxge_load_firmware(sc, 0);
4280 static int
4281 mxge_add_msix_irqs(mxge_softc_t *sc)
4283 size_t bytes;
4284 int count, err, i, rid;
4286 rid = PCIR_BAR(2);
4287 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4288 &rid, RF_ACTIVE);
4290 if (sc->msix_table_res == NULL) {
4291 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4292 return ENXIO;
4295 count = sc->num_slices;
4296 err = pci_alloc_msix(sc->dev, &count);
4297 if (err != 0) {
4298 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4299 "err = %d \n", sc->num_slices, err);
4300 goto abort_with_msix_table;
4302 if (count < sc->num_slices) {
4303 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4304 count, sc->num_slices);
4305 device_printf(sc->dev,
4306 "Try setting hw.mxge.max_slices to %d\n",
4307 count);
4308 err = ENOSPC;
4309 goto abort_with_msix;
4311 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4312 sc->msix_irq_res = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4313 if (sc->msix_irq_res == NULL) {
4314 err = ENOMEM;
4315 goto abort_with_msix;
4318 for (i = 0; i < sc->num_slices; i++) {
4319 rid = i + 1;
4320 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4321 SYS_RES_IRQ,
4322 &rid, RF_ACTIVE);
4323 if (sc->msix_irq_res[i] == NULL) {
4324 device_printf(sc->dev, "couldn't allocate IRQ res"
4325 " for message %d\n", i);
4326 err = ENXIO;
4327 goto abort_with_res;
4331 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4332 sc->msix_ih = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4334 for (i = 0; i < sc->num_slices; i++) {
4335 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4336 INTR_MPSAFE,
4337 mxge_intr, &sc->ss[i], &sc->msix_ih[i],
4338 sc->ifp->if_serializer);
4339 if (err != 0) {
4340 device_printf(sc->dev, "couldn't setup intr for "
4341 "message %d\n", i);
4342 goto abort_with_intr;
4346 if (mxge_verbose) {
4347 device_printf(sc->dev, "using %d msix IRQs:",
4348 sc->num_slices);
4349 for (i = 0; i < sc->num_slices; i++)
4350 kprintf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4351 kprintf("\n");
4353 return (0);
4355 abort_with_intr:
4356 for (i = 0; i < sc->num_slices; i++) {
4357 if (sc->msix_ih[i] != NULL) {
4358 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4359 sc->msix_ih[i]);
4360 sc->msix_ih[i] = NULL;
4363 kfree(sc->msix_ih, M_DEVBUF);
4366 abort_with_res:
4367 for (i = 0; i < sc->num_slices; i++) {
4368 rid = i + 1;
4369 if (sc->msix_irq_res[i] != NULL)
4370 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4371 sc->msix_irq_res[i]);
4372 sc->msix_irq_res[i] = NULL;
4374 kfree(sc->msix_irq_res, M_DEVBUF);
4377 abort_with_msix:
4378 pci_release_msi(sc->dev);
4380 abort_with_msix_table:
4381 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4382 sc->msix_table_res);
4384 return err;
4387 static int
4388 mxge_add_single_irq(mxge_softc_t *sc)
4390 int count, err, rid;
4392 count = pci_msi_count(sc->dev);
4393 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4394 rid = 1;
4395 } else {
4396 rid = 0;
4397 sc->legacy_irq = 1;
4399 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4400 1, RF_SHAREABLE | RF_ACTIVE);
4401 if (sc->irq_res == NULL) {
4402 device_printf(sc->dev, "could not alloc interrupt\n");
4403 return ENXIO;
4405 if (mxge_verbose)
4406 device_printf(sc->dev, "using %s irq %ld\n",
4407 sc->legacy_irq ? "INTx" : "MSI",
4408 rman_get_start(sc->irq_res));
4409 err = bus_setup_intr(sc->dev, sc->irq_res,
4410 INTR_MPSAFE,
4411 mxge_intr, &sc->ss[0], &sc->ih,
4412 sc->ifp->if_serializer);
4413 if (err != 0) {
4414 bus_release_resource(sc->dev, SYS_RES_IRQ,
4415 sc->legacy_irq ? 0 : 1, sc->irq_res);
4416 if (!sc->legacy_irq)
4417 pci_release_msi(sc->dev);
4419 return err;
4422 static void
4423 mxge_rem_msix_irqs(mxge_softc_t *sc)
4425 int i, rid;
4427 for (i = 0; i < sc->num_slices; i++) {
4428 if (sc->msix_ih[i] != NULL) {
4429 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4430 sc->msix_ih[i]);
4431 sc->msix_ih[i] = NULL;
4434 kfree(sc->msix_ih, M_DEVBUF);
4436 for (i = 0; i < sc->num_slices; i++) {
4437 rid = i + 1;
4438 if (sc->msix_irq_res[i] != NULL)
4439 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4440 sc->msix_irq_res[i]);
4441 sc->msix_irq_res[i] = NULL;
4443 kfree(sc->msix_irq_res, M_DEVBUF);
4445 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4446 sc->msix_table_res);
4448 pci_release_msi(sc->dev);
4449 return;
4452 static void
4453 mxge_rem_single_irq(mxge_softc_t *sc)
4455 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4456 bus_release_resource(sc->dev, SYS_RES_IRQ,
4457 sc->legacy_irq ? 0 : 1, sc->irq_res);
4458 if (!sc->legacy_irq)
4459 pci_release_msi(sc->dev);
4462 static void
4463 mxge_rem_irq(mxge_softc_t *sc)
4465 if (sc->num_slices > 1)
4466 mxge_rem_msix_irqs(sc);
4467 else
4468 mxge_rem_single_irq(sc);
4471 static int
4472 mxge_add_irq(mxge_softc_t *sc)
4474 int err;
4476 if (sc->num_slices > 1)
4477 err = mxge_add_msix_irqs(sc);
4478 else
4479 err = mxge_add_single_irq(sc);
4481 if (0 && err == 0 && sc->num_slices > 1) {
4482 mxge_rem_msix_irqs(sc);
4483 err = mxge_add_msix_irqs(sc);
4485 return err;
4489 static int
4490 mxge_attach(device_t dev)
4492 mxge_softc_t *sc = device_get_softc(dev);
4493 struct ifnet *ifp = &sc->arpcom.ac_if;
4494 int err, rid;
4497 * avoid rewriting half the lines in this file to use
4498 * &sc->arpcom.ac_if instead
4500 sc->ifp = ifp;
4501 sc->dev = dev;
4502 mxge_fetch_tunables(sc);
4504 err = bus_dma_tag_create(NULL, /* parent */
4505 1, /* alignment */
4506 0, /* boundary */
4507 BUS_SPACE_MAXADDR, /* low */
4508 BUS_SPACE_MAXADDR, /* high */
4509 NULL, NULL, /* filter */
4510 65536 + 256, /* maxsize */
4511 MXGE_MAX_SEND_DESC, /* num segs */
4512 65536, /* maxsegsize */
4513 0, /* flags */
4514 &sc->parent_dmat); /* tag */
4516 if (err != 0) {
4517 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4518 err);
4519 goto abort_with_nothing;
4522 sc->ifp = ifp;
4523 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4525 callout_init_mp(&sc->co_hdl);
4527 mxge_setup_cfg_space(sc);
4529 /* Map the board into the kernel */
4530 rid = PCIR_BARS;
4531 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4532 ~0, 1, RF_ACTIVE);
4533 if (sc->mem_res == NULL) {
4534 device_printf(dev, "could not map memory\n");
4535 err = ENXIO;
4536 goto abort_with_nothing;
4538 sc->sram = rman_get_virtual(sc->mem_res);
4539 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4540 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4541 device_printf(dev, "impossible memory region size %ld\n",
4542 rman_get_size(sc->mem_res));
4543 err = ENXIO;
4544 goto abort_with_mem_res;
4547 /* make NULL terminated copy of the EEPROM strings section of
4548 lanai SRAM */
4549 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4550 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4551 rman_get_bushandle(sc->mem_res),
4552 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4553 sc->eeprom_strings,
4554 MXGE_EEPROM_STRINGS_SIZE - 2);
4555 err = mxge_parse_strings(sc);
4556 if (err != 0)
4557 goto abort_with_mem_res;
4559 /* Enable write combining for efficient use of PCIe bus */
4560 mxge_enable_wc(sc);
4562 /* Allocate the out of band dma memory */
4563 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4564 sizeof (mxge_cmd_t), 64);
4565 if (err != 0)
4566 goto abort_with_mem_res;
4567 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4568 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4569 if (err != 0)
4570 goto abort_with_cmd_dma;
4572 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4573 if (err != 0)
4574 goto abort_with_zeropad_dma;
4576 /* select & load the firmware */
4577 err = mxge_select_firmware(sc);
4578 if (err != 0)
4579 goto abort_with_dmabench;
4580 sc->intr_coal_delay = mxge_intr_coal_delay;
4582 mxge_slice_probe(sc);
4583 err = mxge_alloc_slices(sc);
4584 if (err != 0)
4585 goto abort_with_dmabench;
4587 err = mxge_reset(sc, 0);
4588 if (err != 0)
4589 goto abort_with_slices;
4591 err = mxge_alloc_rings(sc);
4592 if (err != 0) {
4593 device_printf(sc->dev, "failed to allocate rings\n");
4594 goto abort_with_dmabench;
4597 ifp->if_baudrate = IF_Gbps(10UL);
4598 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4599 IFCAP_VLAN_MTU;
4600 #ifdef INET
4601 ifp->if_capabilities |= IFCAP_LRO;
4602 #endif
4604 #ifdef MXGE_NEW_VLAN_API
4605 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4606 #endif
4608 sc->max_mtu = mxge_max_mtu(sc);
4609 if (sc->max_mtu >= 9000)
4610 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4611 else
4612 device_printf(dev, "MTU limited to %d. Install "
4613 "latest firmware for 9000 byte jumbo support\n",
4614 sc->max_mtu - ETHER_HDR_LEN);
4615 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4616 ifp->if_capenable = ifp->if_capabilities;
4617 if (sc->lro_cnt == 0)
4618 ifp->if_capenable &= ~IFCAP_LRO;
4619 sc->csum_flag = 1;
4620 ifp->if_init = mxge_init;
4621 ifp->if_softc = sc;
4622 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4623 ifp->if_ioctl = mxge_ioctl;
4624 ifp->if_start = mxge_start;
4625 /* Initialise the ifmedia structure */
4626 ifmedia_init(&sc->media, 0, mxge_media_change,
4627 mxge_media_status);
4628 mxge_set_media(sc, IFM_ETHER | IFM_AUTO);
4629 mxge_media_probe(sc);
4630 sc->dying = 0;
4631 ether_ifattach(ifp, sc->mac_addr, NULL);
4632 /* ether_ifattach sets mtu to ETHERMTU */
4633 if (mxge_initial_mtu != ETHERMTU) {
4634 lwkt_serialize_enter(ifp->if_serializer);
4635 mxge_change_mtu(sc, mxge_initial_mtu);
4636 lwkt_serialize_exit(ifp->if_serializer);
4638 /* must come after ether_ifattach() */
4639 err = mxge_add_irq(sc);
4640 if (err != 0) {
4641 device_printf(sc->dev, "failed to add irq\n");
4642 goto abort_with_rings;
4645 mxge_add_sysctls(sc);
4646 #ifdef IFNET_BUF_RING
4647 ifp->if_transmit = mxge_transmit;
4648 ifp->if_qflush = mxge_qflush;
4649 #endif
4650 return 0;
4652 abort_with_rings:
4653 mxge_free_rings(sc);
4654 abort_with_slices:
4655 mxge_free_slices(sc);
4656 abort_with_dmabench:
4657 mxge_dma_free(&sc->dmabench_dma);
4658 abort_with_zeropad_dma:
4659 mxge_dma_free(&sc->zeropad_dma);
4660 abort_with_cmd_dma:
4661 mxge_dma_free(&sc->cmd_dma);
4662 abort_with_mem_res:
4663 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4664 pci_disable_busmaster(dev);
4665 bus_dma_tag_destroy(sc->parent_dmat);
4666 abort_with_nothing:
4667 return err;
4670 static int
4671 mxge_detach(device_t dev)
4673 mxge_softc_t *sc = device_get_softc(dev);
4675 lwkt_serialize_enter(sc->ifp->if_serializer);
4676 sc->dying = 1;
4677 if (sc->ifp->if_flags & IFF_RUNNING)
4678 mxge_close(sc);
4680 * XXX: race: the callout callback could be spinning on
4681 * the serializer and run anyway
4683 callout_stop(&sc->co_hdl);
4684 lwkt_serialize_exit(sc->ifp->if_serializer);
4686 ether_ifdetach(sc->ifp);
4687 ifmedia_removeall(&sc->media);
4688 mxge_dummy_rdma(sc, 0);
4689 mxge_rem_sysctls(sc);
4690 mxge_rem_irq(sc);
4691 mxge_free_rings(sc);
4692 mxge_free_slices(sc);
4693 mxge_dma_free(&sc->dmabench_dma);
4694 mxge_dma_free(&sc->zeropad_dma);
4695 mxge_dma_free(&sc->cmd_dma);
4696 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4697 pci_disable_busmaster(dev);
4698 bus_dma_tag_destroy(sc->parent_dmat);
4699 return 0;
4702 static int
4703 mxge_shutdown(device_t dev)
4705 return 0;
4709 This file uses Myri10GE driver indentation.
4711 Local Variables:
4712 c-file-style:"linux"
4713 tab-width:8
4714 End: