mxge: call bus_setup_intr() after the if serializer has been set
[dragonfly.git] / sys / dev / netif / mxge / if_mxge.c
blob82d966bf75544ecb6b47fc0bad94975c6d9edebd
1 /******************************************************************************
3 Copyright (c) 2006-2009, Myricom Inc.
4 All rights reserved.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 /*__FBSDID("$FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $");*/
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/in_cksum.h>
39 #include <sys/sockio.h>
40 #include <sys/mbuf.h>
41 #include <sys/malloc.h>
42 #include <sys/kernel.h>
43 #include <sys/module.h>
44 #include <sys/serialize.h>
45 #include <sys/socket.h>
46 #include <sys/sysctl.h>
48 /* count xmits ourselves, rather than via drbr */
49 #define NO_SLOW_STATS
50 #include <net/if.h>
51 #include <net/if_arp.h>
52 #include <net/ifq_var.h>
53 #include <net/ethernet.h>
54 #include <net/if_dl.h>
55 #include <net/if_media.h>
57 #include <net/bpf.h>
59 #include <net/if_types.h>
60 #include <net/vlan/if_vlan_var.h>
61 #include <net/zlib.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/ip.h>
66 #include <netinet/tcp.h>
68 #include <sys/bus.h>
69 #include <sys/rman.h>
71 #include <bus/pci/pcireg.h>
72 #include <bus/pci/pcivar.h>
73 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
75 #include <vm/vm.h> /* for pmap_mapdev() */
76 #include <vm/pmap.h>
78 #if defined(__i386) || defined(__amd64)
79 #include <machine/specialreg.h>
80 #endif
82 #include <dev/netif/mxge/mxge_mcp.h>
83 #include <dev/netif/mxge/mcp_gen_header.h>
84 /*#define MXGE_FAKE_IFP*/
85 #include <dev/netif/mxge/if_mxge_var.h>
86 #ifdef IFNET_BUF_RING
87 #include <sys/buf_ring.h>
88 #endif
90 #include "opt_inet.h"
92 /* tunable params */
93 static int mxge_nvidia_ecrc_enable = 1;
94 static int mxge_force_firmware = 0;
95 static int mxge_intr_coal_delay = 30;
96 static int mxge_deassert_wait = 1;
97 static int mxge_flow_control = 1;
98 static int mxge_verbose = 0;
99 static int mxge_lro_cnt = 8;
100 static int mxge_ticks;
101 static int mxge_max_slices = 1;
102 static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
103 static int mxge_always_promisc = 0;
104 /* XXX: not yet */
105 /* static int mxge_initial_mtu = ETHERMTU_JUMBO; */
106 static int mxge_initial_mtu = ETHERMTU;
107 static char *mxge_fw_unaligned = "mxge_ethp_z8e";
108 static char *mxge_fw_aligned = "mxge_eth_z8e";
109 static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e";
110 static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e";
112 static int mxge_probe(device_t dev);
113 static int mxge_attach(device_t dev);
114 static int mxge_detach(device_t dev);
115 static int mxge_shutdown(device_t dev);
116 static void mxge_intr(void *arg);
118 static device_method_t mxge_methods[] =
120 /* Device interface */
121 DEVMETHOD(device_probe, mxge_probe),
122 DEVMETHOD(device_attach, mxge_attach),
123 DEVMETHOD(device_detach, mxge_detach),
124 DEVMETHOD(device_shutdown, mxge_shutdown),
125 {0, 0}
128 static driver_t mxge_driver =
130 "mxge",
131 mxge_methods,
132 sizeof(mxge_softc_t),
135 static devclass_t mxge_devclass;
137 /* Declare ourselves to be a child of the PCI bus.*/
138 DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0);
139 MODULE_DEPEND(mxge, firmware, 1, 1, 1);
140 MODULE_DEPEND(mxge, zlib, 1, 1, 1);
142 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
143 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
144 static int mxge_close(mxge_softc_t *sc);
145 static int mxge_open(mxge_softc_t *sc);
146 static void mxge_tick(void *arg);
148 /* XXX: we don't have Large Receive Offload support yet */
149 inline int
150 mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
152 (void)ss;
153 (void)m_head;
154 (void)csum;
155 return 1;
158 inline void
159 mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
161 (void)ss;
162 (void)lro;
165 static int
166 mxge_probe(device_t dev)
168 int rev;
171 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) &&
172 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) ||
173 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) {
174 rev = pci_get_revid(dev);
175 switch (rev) {
176 case MXGE_PCI_REV_Z8E:
177 device_set_desc(dev, "Myri10G-PCIE-8A");
178 break;
179 case MXGE_PCI_REV_Z8ES:
180 device_set_desc(dev, "Myri10G-PCIE-8B");
181 break;
182 default:
183 device_set_desc(dev, "Myri10G-PCIE-8??");
184 device_printf(dev, "Unrecognized rev %d NIC\n",
185 rev);
186 break;
188 return 0;
190 return ENXIO;
193 static void
194 mxge_enable_wc(mxge_softc_t *sc)
196 #if 0
197 #if defined(__i386) || defined(__amd64)
198 vm_offset_t len;
199 int err;
201 sc->wc = 1;
202 len = rman_get_size(sc->mem_res);
203 err = pmap_change_attr((vm_offset_t) sc->sram,
204 len, PAT_WRITE_COMBINING);
205 if (err != 0) {
206 device_printf(sc->dev, "pmap_change_attr failed, %d\n",
207 err);
208 sc->wc = 0;
210 #endif
211 #else
212 sc->wc = 0; /* TBD: PAT support */
213 #endif
217 /* callback to get our DMA address */
218 static void
219 mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
220 int error)
222 if (error == 0) {
223 *(bus_addr_t *) arg = segs->ds_addr;
227 static int
228 mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes,
229 bus_size_t alignment)
231 int err;
232 device_t dev = sc->dev;
233 bus_size_t boundary, maxsegsize;
235 if (bytes > 4096 && alignment == 4096) {
236 boundary = 0;
237 maxsegsize = bytes;
238 } else {
239 boundary = 4096;
240 maxsegsize = 4096;
243 /* allocate DMAable memory tags */
244 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
245 alignment, /* alignment */
246 boundary, /* boundary */
247 BUS_SPACE_MAXADDR, /* low */
248 BUS_SPACE_MAXADDR, /* high */
249 NULL, NULL, /* filter */
250 bytes, /* maxsize */
251 1, /* num segs */
252 maxsegsize, /* maxsegsize */
253 BUS_DMA_COHERENT, /* flags */
254 &dma->dmat); /* tag */
255 if (err != 0) {
256 device_printf(dev, "couldn't alloc tag (err = %d)\n", err);
257 return err;
260 /* allocate DMAable memory & map */
261 err = bus_dmamem_alloc(dma->dmat, &dma->addr,
262 (BUS_DMA_WAITOK | BUS_DMA_COHERENT
263 | BUS_DMA_ZERO), &dma->map);
264 if (err != 0) {
265 device_printf(dev, "couldn't alloc mem (err = %d)\n", err);
266 goto abort_with_dmat;
269 /* load the memory */
270 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes,
271 mxge_dmamap_callback,
272 (void *)&dma->bus_addr, 0);
273 if (err != 0) {
274 device_printf(dev, "couldn't load map (err = %d)\n", err);
275 goto abort_with_mem;
277 return 0;
279 abort_with_mem:
280 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
281 abort_with_dmat:
282 (void)bus_dma_tag_destroy(dma->dmat);
283 return err;
287 static void
288 mxge_dma_free(mxge_dma_t *dma)
290 bus_dmamap_unload(dma->dmat, dma->map);
291 bus_dmamem_free(dma->dmat, dma->addr, dma->map);
292 (void)bus_dma_tag_destroy(dma->dmat);
296 * The eeprom strings on the lanaiX have the format
297 * SN=x\0
298 * MAC=x:x:x:x:x:x\0
299 * PC=text\0
302 static int
303 mxge_parse_strings(mxge_softc_t *sc)
305 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
307 char *ptr, *limit;
308 int i, found_mac;
310 ptr = sc->eeprom_strings;
311 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE;
312 found_mac = 0;
313 while (ptr < limit && *ptr != '\0') {
314 if (memcmp(ptr, "MAC=", 4) == 0) {
315 ptr += 1;
316 sc->mac_addr_string = ptr;
317 for (i = 0; i < 6; i++) {
318 ptr += 3;
319 if ((ptr + 2) > limit)
320 goto abort;
321 sc->mac_addr[i] = strtoul(ptr, NULL, 16);
322 found_mac = 1;
324 } else if (memcmp(ptr, "PC=", 3) == 0) {
325 ptr += 3;
326 strncpy(sc->product_code_string, ptr,
327 sizeof (sc->product_code_string) - 1);
328 } else if (memcmp(ptr, "SN=", 3) == 0) {
329 ptr += 3;
330 strncpy(sc->serial_number_string, ptr,
331 sizeof (sc->serial_number_string) - 1);
333 MXGE_NEXT_STRING(ptr);
336 if (found_mac)
337 return 0;
339 abort:
340 device_printf(sc->dev, "failed to parse eeprom_strings\n");
342 return ENXIO;
345 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
346 static void
347 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
349 uint32_t val;
350 unsigned long base, off;
351 char *va, *cfgptr;
352 device_t pdev, mcp55;
353 uint16_t vendor_id, device_id, word;
354 uintptr_t bus, slot, func, ivend, idev;
355 uint32_t *ptr32;
358 if (!mxge_nvidia_ecrc_enable)
359 return;
361 pdev = device_get_parent(device_get_parent(sc->dev));
362 if (pdev == NULL) {
363 device_printf(sc->dev, "could not find parent?\n");
364 return;
366 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2);
367 device_id = pci_read_config(pdev, PCIR_DEVICE, 2);
369 if (vendor_id != 0x10de)
370 return;
372 base = 0;
374 if (device_id == 0x005d) {
375 /* ck804, base address is magic */
376 base = 0xe0000000UL;
377 } else if (device_id >= 0x0374 && device_id <= 0x378) {
378 /* mcp55, base address stored in chipset */
379 mcp55 = pci_find_bsf(0, 0, 0);
380 if (mcp55 &&
381 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) &&
382 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) {
383 word = pci_read_config(mcp55, 0x90, 2);
384 base = ((unsigned long)word & 0x7ffeU) << 25;
387 if (!base)
388 return;
390 /* XXXX
391 Test below is commented because it is believed that doing
392 config read/write beyond 0xff will access the config space
393 for the next larger function. Uncomment this and remove
394 the hacky pmap_mapdev() way of accessing config space when
395 FreeBSD grows support for extended pcie config space access
397 #if 0
398 /* See if we can, by some miracle, access the extended
399 config space */
400 val = pci_read_config(pdev, 0x178, 4);
401 if (val != 0xffffffff) {
402 val |= 0x40;
403 pci_write_config(pdev, 0x178, val, 4);
404 return;
406 #endif
407 /* Rather than using normal pci config space writes, we must
408 * map the Nvidia config space ourselves. This is because on
409 * opteron/nvidia class machine the 0xe000000 mapping is
410 * handled by the nvidia chipset, that means the internal PCI
411 * device (the on-chip northbridge), or the amd-8131 bridge
412 * and things behind them are not visible by this method.
415 BUS_READ_IVAR(device_get_parent(pdev), pdev,
416 PCI_IVAR_BUS, &bus);
417 BUS_READ_IVAR(device_get_parent(pdev), pdev,
418 PCI_IVAR_SLOT, &slot);
419 BUS_READ_IVAR(device_get_parent(pdev), pdev,
420 PCI_IVAR_FUNCTION, &func);
421 BUS_READ_IVAR(device_get_parent(pdev), pdev,
422 PCI_IVAR_VENDOR, &ivend);
423 BUS_READ_IVAR(device_get_parent(pdev), pdev,
424 PCI_IVAR_DEVICE, &idev);
426 off = base
427 + 0x00100000UL * (unsigned long)bus
428 + 0x00001000UL * (unsigned long)(func
429 + 8 * slot);
431 /* map it into the kernel */
432 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE);
435 if (va == NULL) {
436 device_printf(sc->dev, "pmap_kenter_temporary didn't\n");
437 return;
439 /* get a pointer to the config space mapped into the kernel */
440 cfgptr = va + (off & PAGE_MASK);
442 /* make sure that we can really access it */
443 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR);
444 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE);
445 if (! (vendor_id == ivend && device_id == idev)) {
446 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n",
447 vendor_id, device_id);
448 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
449 return;
452 ptr32 = (uint32_t*)(cfgptr + 0x178);
453 val = *ptr32;
455 if (val == 0xffffffff) {
456 device_printf(sc->dev, "extended mapping failed\n");
457 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
458 return;
460 *ptr32 = val | 0x40;
461 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE);
462 if (mxge_verbose)
463 device_printf(sc->dev,
464 "Enabled ECRC on upstream Nvidia bridge "
465 "at %d:%d:%d\n",
466 (int)bus, (int)slot, (int)func);
467 return;
469 #else
470 static void
471 mxge_enable_nvidia_ecrc(mxge_softc_t *sc)
473 device_printf(sc->dev,
474 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
475 return;
477 #endif
480 static int
481 mxge_dma_test(mxge_softc_t *sc, int test_type)
483 mxge_cmd_t cmd;
484 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr;
485 int status;
486 uint32_t len;
487 char *test = " ";
490 /* Run a small DMA test.
491 * The magic multipliers to the length tell the firmware
492 * to do DMA read, write, or read+write tests. The
493 * results are returned in cmd.data0. The upper 16
494 * bits of the return is the number of transfers completed.
495 * The lower 16 bits is the time in 0.5us ticks that the
496 * transfers took to complete.
499 len = sc->tx_boundary;
501 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
502 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
503 cmd.data2 = len * 0x10000;
504 status = mxge_send_cmd(sc, test_type, &cmd);
505 if (status != 0) {
506 test = "read";
507 goto abort;
509 sc->read_dma = ((cmd.data0>>16) * len * 2) /
510 (cmd.data0 & 0xffff);
511 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
512 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
513 cmd.data2 = len * 0x1;
514 status = mxge_send_cmd(sc, test_type, &cmd);
515 if (status != 0) {
516 test = "write";
517 goto abort;
519 sc->write_dma = ((cmd.data0>>16) * len * 2) /
520 (cmd.data0 & 0xffff);
522 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus);
523 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus);
524 cmd.data2 = len * 0x10001;
525 status = mxge_send_cmd(sc, test_type, &cmd);
526 if (status != 0) {
527 test = "read/write";
528 goto abort;
530 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) /
531 (cmd.data0 & 0xffff);
533 abort:
534 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST)
535 device_printf(sc->dev, "DMA %s benchmark failed: %d\n",
536 test, status);
538 return status;
542 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
543 * when the PCI-E Completion packets are aligned on an 8-byte
544 * boundary. Some PCI-E chip sets always align Completion packets; on
545 * the ones that do not, the alignment can be enforced by enabling
546 * ECRC generation (if supported).
548 * When PCI-E Completion packets are not aligned, it is actually more
549 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
551 * If the driver can neither enable ECRC nor verify that it has
552 * already been enabled, then it must use a firmware image which works
553 * around unaligned completion packets (ethp_z8e.dat), and it should
554 * also ensure that it never gives the device a Read-DMA which is
555 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
556 * enabled, then the driver should use the aligned (eth_z8e.dat)
557 * firmware image, and set tx_boundary to 4KB.
560 static int
561 mxge_firmware_probe(mxge_softc_t *sc)
563 device_t dev = sc->dev;
564 int reg, status;
565 uint16_t pectl;
567 sc->tx_boundary = 4096;
569 * Verify the max read request size was set to 4KB
570 * before trying the test with 4KB.
572 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
573 pectl = pci_read_config(dev, reg + 0x8, 2);
574 if ((pectl & (5 << 12)) != (5 << 12)) {
575 device_printf(dev, "Max Read Req. size != 4k (0x%x\n",
576 pectl);
577 sc->tx_boundary = 2048;
582 * load the optimized firmware (which assumes aligned PCIe
583 * completions) in order to see if it works on this host.
585 sc->fw_name = mxge_fw_aligned;
586 status = mxge_load_firmware(sc, 1);
587 if (status != 0) {
588 return status;
592 * Enable ECRC if possible
594 mxge_enable_nvidia_ecrc(sc);
597 * Run a DMA test which watches for unaligned completions and
598 * aborts on the first one seen.
601 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST);
602 if (status == 0)
603 return 0; /* keep the aligned firmware */
605 if (status != E2BIG)
606 device_printf(dev, "DMA test failed: %d\n", status);
607 if (status == ENOSYS)
608 device_printf(dev, "Falling back to ethp! "
609 "Please install up to date fw\n");
610 return status;
613 static int
614 mxge_select_firmware(mxge_softc_t *sc)
616 int aligned = 0;
619 if (mxge_force_firmware != 0) {
620 if (mxge_force_firmware == 1)
621 aligned = 1;
622 else
623 aligned = 0;
624 if (mxge_verbose)
625 device_printf(sc->dev,
626 "Assuming %s completions (forced)\n",
627 aligned ? "aligned" : "unaligned");
628 goto abort;
631 /* if the PCIe link width is 4 or less, we can use the aligned
632 firmware and skip any checks */
633 if (sc->link_width != 0 && sc->link_width <= 4) {
634 device_printf(sc->dev,
635 "PCIe x%d Link, expect reduced performance\n",
636 sc->link_width);
637 aligned = 1;
638 goto abort;
641 if (0 == mxge_firmware_probe(sc))
642 return 0;
644 abort:
645 if (aligned) {
646 sc->fw_name = mxge_fw_aligned;
647 sc->tx_boundary = 4096;
648 } else {
649 sc->fw_name = mxge_fw_unaligned;
650 sc->tx_boundary = 2048;
652 return (mxge_load_firmware(sc, 0));
655 union qualhack
657 const char *ro_char;
658 char *rw_char;
661 static int
662 mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr)
666 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) {
667 device_printf(sc->dev, "Bad firmware type: 0x%x\n",
668 be32toh(hdr->mcp_type));
669 return EIO;
672 /* save firmware version for sysctl */
673 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version));
674 if (mxge_verbose)
675 device_printf(sc->dev, "firmware id: %s\n", hdr->version);
677 ksscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major,
678 &sc->fw_ver_minor, &sc->fw_ver_tiny);
680 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR
681 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) {
682 device_printf(sc->dev, "Found firmware version %s\n",
683 sc->fw_version);
684 device_printf(sc->dev, "Driver needs %d.%d\n",
685 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR);
686 return EINVAL;
688 return 0;
692 #if 0
693 static void *
694 z_alloc(void *nil, u_int items, u_int size)
696 void *ptr;
698 ptr = kmalloc(items * size, M_TEMP, M_NOWAIT);
699 return ptr;
702 static void
703 z_free(void *nil, void *ptr)
705 kfree(ptr, M_TEMP);
707 #endif
709 static int
710 mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit)
712 struct fw_image *fw;
713 const mcp_gen_header_t *hdr;
714 unsigned hdr_offset;
715 int status;
716 unsigned int i;
717 char dummy;
718 size_t fw_len;
720 fw = firmware_image_load(sc->fw_name, NULL);
721 if (fw == NULL) {
722 device_printf(sc->dev, "Could not find firmware image %s\n",
723 sc->fw_name);
724 return ENOENT;
726 #if 0
727 /* setup zlib and decompress f/w */
728 bzero(&zs, sizeof (zs));
729 zs.zalloc = z_alloc;
730 zs.zfree = z_free;
731 status = inflateInit(&zs);
732 if (status != Z_OK) {
733 status = EIO;
734 goto abort_with_fw;
737 /* the uncompressed size is stored as the firmware version,
738 which would otherwise go unused */
739 fw_len = (size_t) fw->version;
740 inflate_buffer = kmalloc(fw_len, M_TEMP, M_NOWAIT);
741 if (inflate_buffer == NULL)
742 goto abort_with_zs;
743 zs.avail_in = fw->datasize;
744 zs.next_in = __DECONST(char *, fw->data);
745 zs.avail_out = fw_len;
746 zs.next_out = inflate_buffer;
747 status = inflate(&zs, Z_FINISH);
748 if (status != Z_STREAM_END) {
749 device_printf(sc->dev, "zlib %d\n", status);
750 status = EIO;
751 goto abort_with_buffer;
753 #endif
754 fw_len = fw->fw_imglen;
755 /* check id */
756 hdr_offset = htobe32(*(const uint32_t *)
757 (fw->fw_image + MCP_HEADER_PTR_OFFSET));
758 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) {
759 device_printf(sc->dev, "Bad firmware file");
760 status = EIO;
761 goto abort_with_fw;
763 hdr = (const void*)(fw->fw_image + hdr_offset);
765 status = mxge_validate_firmware(sc, hdr);
766 if (status != 0)
767 goto abort_with_fw;
769 /* Copy the inflated firmware to NIC SRAM. */
770 for (i = 0; i < fw_len; i += 256) {
771 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i,
772 fw->fw_image + i,
773 min(256U, (unsigned)(fw_len - i)));
774 wmb();
775 dummy = *sc->sram;
776 wmb();
779 *limit = fw_len;
780 status = 0;
781 #if 0
782 abort_with_buffer:
783 kfree(inflate_buffer, M_TEMP);
784 abort_with_zs:
785 inflateEnd(&zs);
786 #endif
787 abort_with_fw:
788 firmware_image_unload(fw);
789 return status;
793 * Enable or disable periodic RDMAs from the host to make certain
794 * chipsets resend dropped PCIe messages
797 static void
798 mxge_dummy_rdma(mxge_softc_t *sc, int enable)
800 char buf_bytes[72];
801 volatile uint32_t *confirm;
802 volatile char *submit;
803 uint32_t *buf, dma_low, dma_high;
804 int i;
806 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
808 /* clear confirmation addr */
809 confirm = (volatile uint32_t *)sc->cmd;
810 *confirm = 0;
811 wmb();
813 /* send an rdma command to the PCIe engine, and wait for the
814 response in the confirmation address. The firmware should
815 write a -1 there to indicate it is alive and well
818 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
819 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
820 buf[0] = htobe32(dma_high); /* confirm addr MSW */
821 buf[1] = htobe32(dma_low); /* confirm addr LSW */
822 buf[2] = htobe32(0xffffffff); /* confirm data */
823 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr);
824 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr);
825 buf[3] = htobe32(dma_high); /* dummy addr MSW */
826 buf[4] = htobe32(dma_low); /* dummy addr LSW */
827 buf[5] = htobe32(enable); /* enable? */
830 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA);
832 mxge_pio_copy(submit, buf, 64);
833 wmb();
834 DELAY(1000);
835 wmb();
836 i = 0;
837 while (*confirm != 0xffffffff && i < 20) {
838 DELAY(1000);
839 i++;
841 if (*confirm != 0xffffffff) {
842 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)",
843 (enable ? "enable" : "disable"), confirm,
844 *confirm);
846 return;
849 static int
850 mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data)
852 mcp_cmd_t *buf;
853 char buf_bytes[sizeof(*buf) + 8];
854 volatile mcp_cmd_response_t *response = sc->cmd;
855 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD;
856 uint32_t dma_low, dma_high;
857 int err, sleep_total = 0;
860 * We may be called during attach, before if_serializer is available.
861 * This is not a fast path, just check for NULL
864 if (sc->ifp->if_serializer)
865 ASSERT_SERIALIZED(sc->ifp->if_serializer);
867 /* ensure buf is aligned to 8 bytes */
868 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
870 buf->data0 = htobe32(data->data0);
871 buf->data1 = htobe32(data->data1);
872 buf->data2 = htobe32(data->data2);
873 buf->cmd = htobe32(cmd);
874 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
875 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
877 buf->response_addr.low = htobe32(dma_low);
878 buf->response_addr.high = htobe32(dma_high);
881 response->result = 0xffffffff;
882 wmb();
883 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf));
885 /* wait up to 20ms */
886 err = EAGAIN;
887 for (sleep_total = 0; sleep_total < 20; sleep_total++) {
888 bus_dmamap_sync(sc->cmd_dma.dmat,
889 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
890 wmb();
891 switch (be32toh(response->result)) {
892 case 0:
893 data->data0 = be32toh(response->data);
894 err = 0;
895 break;
896 case 0xffffffff:
897 DELAY(1000);
898 break;
899 case MXGEFW_CMD_UNKNOWN:
900 err = ENOSYS;
901 break;
902 case MXGEFW_CMD_ERROR_UNALIGNED:
903 err = E2BIG;
904 break;
905 case MXGEFW_CMD_ERROR_BUSY:
906 err = EBUSY;
907 break;
908 default:
909 device_printf(sc->dev,
910 "mxge: command %d "
911 "failed, result = %d\n",
912 cmd, be32toh(response->result));
913 err = ENXIO;
914 break;
916 if (err != EAGAIN)
917 break;
919 if (err == EAGAIN)
920 device_printf(sc->dev, "mxge: command %d timed out"
921 "result = %d\n",
922 cmd, be32toh(response->result));
923 return err;
926 static int
927 mxge_adopt_running_firmware(mxge_softc_t *sc)
929 struct mcp_gen_header *hdr;
930 const size_t bytes = sizeof (struct mcp_gen_header);
931 size_t hdr_offset;
932 int status;
934 /* find running firmware header */
935 hdr_offset = htobe32(*(volatile uint32_t *)
936 (sc->sram + MCP_HEADER_PTR_OFFSET));
938 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) {
939 device_printf(sc->dev,
940 "Running firmware has bad header offset (%d)\n",
941 (int)hdr_offset);
942 return EIO;
945 /* copy header of running firmware from SRAM to host memory to
946 * validate firmware */
947 hdr = kmalloc(bytes, M_DEVBUF, M_NOWAIT);
948 if (hdr == NULL) {
949 device_printf(sc->dev, "could not kmalloc firmware hdr\n");
950 return ENOMEM;
952 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
953 rman_get_bushandle(sc->mem_res),
954 hdr_offset, (char *)hdr, bytes);
955 status = mxge_validate_firmware(sc, hdr);
956 kfree(hdr, M_DEVBUF);
959 * check to see if adopted firmware has bug where adopting
960 * it will cause broadcasts to be filtered unless the NIC
961 * is kept in ALLMULTI mode
963 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 &&
964 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) {
965 sc->adopted_rx_filter_bug = 1;
966 device_printf(sc->dev, "Adopting fw %d.%d.%d: "
967 "working around rx filter bug\n",
968 sc->fw_ver_major, sc->fw_ver_minor,
969 sc->fw_ver_tiny);
972 return status;
976 static int
977 mxge_load_firmware(mxge_softc_t *sc, int adopt)
979 volatile uint32_t *confirm;
980 volatile char *submit;
981 char buf_bytes[72];
982 uint32_t *buf, size, dma_low, dma_high;
983 int status, i;
985 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL);
987 size = sc->sram_size;
988 status = mxge_load_firmware_helper(sc, &size);
989 if (status) {
990 if (!adopt)
991 return status;
992 /* Try to use the currently running firmware, if
993 it is new enough */
994 status = mxge_adopt_running_firmware(sc);
995 if (status) {
996 device_printf(sc->dev,
997 "failed to adopt running firmware\n");
998 return status;
1000 device_printf(sc->dev,
1001 "Successfully adopted running firmware\n");
1002 if (sc->tx_boundary == 4096) {
1003 device_printf(sc->dev,
1004 "Using firmware currently running on NIC"
1005 ". For optimal\n");
1006 device_printf(sc->dev,
1007 "performance consider loading optimized "
1008 "firmware\n");
1010 sc->fw_name = mxge_fw_unaligned;
1011 sc->tx_boundary = 2048;
1012 return 0;
1014 /* clear confirmation addr */
1015 confirm = (volatile uint32_t *)sc->cmd;
1016 *confirm = 0;
1017 wmb();
1018 /* send a reload command to the bootstrap MCP, and wait for the
1019 response in the confirmation address. The firmware should
1020 write a -1 there to indicate it is alive and well
1023 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr);
1024 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr);
1026 buf[0] = htobe32(dma_high); /* confirm addr MSW */
1027 buf[1] = htobe32(dma_low); /* confirm addr LSW */
1028 buf[2] = htobe32(0xffffffff); /* confirm data */
1030 /* FIX: All newest firmware should un-protect the bottom of
1031 the sram before handoff. However, the very first interfaces
1032 do not. Therefore the handoff copy must skip the first 8 bytes
1034 /* where the code starts*/
1035 buf[3] = htobe32(MXGE_FW_OFFSET + 8);
1036 buf[4] = htobe32(size - 8); /* length of code */
1037 buf[5] = htobe32(8); /* where to copy to */
1038 buf[6] = htobe32(0); /* where to jump to */
1040 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF);
1041 mxge_pio_copy(submit, buf, 64);
1042 wmb();
1043 DELAY(1000);
1044 wmb();
1045 i = 0;
1046 while (*confirm != 0xffffffff && i < 20) {
1047 DELAY(1000*10);
1048 i++;
1049 bus_dmamap_sync(sc->cmd_dma.dmat,
1050 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD);
1052 if (*confirm != 0xffffffff) {
1053 device_printf(sc->dev,"handoff failed (%p = 0x%x)",
1054 confirm, *confirm);
1056 return ENXIO;
1058 return 0;
1061 static int
1062 mxge_update_mac_address(mxge_softc_t *sc)
1064 mxge_cmd_t cmd;
1065 uint8_t *addr = sc->mac_addr;
1066 int status;
1069 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16)
1070 | (addr[2] << 8) | addr[3]);
1072 cmd.data1 = ((addr[4] << 8) | (addr[5]));
1074 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd);
1075 return status;
1078 static int
1079 mxge_change_pause(mxge_softc_t *sc, int pause)
1081 mxge_cmd_t cmd;
1082 int status;
1084 if (pause)
1085 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL,
1086 &cmd);
1087 else
1088 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL,
1089 &cmd);
1091 if (status) {
1092 device_printf(sc->dev, "Failed to set flow control mode\n");
1093 return ENXIO;
1095 sc->pause = pause;
1096 return 0;
1099 static void
1100 mxge_change_promisc(mxge_softc_t *sc, int promisc)
1102 mxge_cmd_t cmd;
1103 int status;
1105 if( sc->ifp->if_serializer)
1106 ASSERT_SERIALIZED(sc->ifp->if_serializer);
1107 if (mxge_always_promisc)
1108 promisc = 1;
1110 if (promisc)
1111 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC,
1112 &cmd);
1113 else
1114 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC,
1115 &cmd);
1117 if (status) {
1118 device_printf(sc->dev, "Failed to set promisc mode\n");
1122 static void
1123 mxge_set_multicast_list(mxge_softc_t *sc)
1125 mxge_cmd_t cmd;
1126 struct ifmultiaddr *ifma;
1127 struct ifnet *ifp = sc->ifp;
1128 int err;
1130 if (ifp->if_serializer)
1131 ASSERT_SERIALIZED(ifp->if_serializer);
1133 /* This firmware is known to not support multicast */
1134 if (!sc->fw_multicast_support)
1135 return;
1137 /* Disable multicast filtering while we play with the lists*/
1138 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd);
1139 if (err != 0) {
1140 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI,"
1141 " error status: %d\n", err);
1142 return;
1145 if (sc->adopted_rx_filter_bug)
1146 return;
1148 if (ifp->if_flags & IFF_ALLMULTI)
1149 /* request to disable multicast filtering, so quit here */
1150 return;
1152 /* Flush all the filters */
1154 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd);
1155 if (err != 0) {
1156 device_printf(sc->dev,
1157 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1158 ", error status: %d\n", err);
1159 return;
1162 /* Walk the multicast list, and add each address */
1164 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1165 if (ifma->ifma_addr->sa_family != AF_LINK)
1166 continue;
1167 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1168 &cmd.data0, 4);
1169 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4,
1170 &cmd.data1, 2);
1171 cmd.data0 = htonl(cmd.data0);
1172 cmd.data1 = htonl(cmd.data1);
1173 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd);
1174 if (err != 0) {
1175 device_printf(sc->dev, "Failed "
1176 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1177 "%d\t", err);
1178 /* abort, leaving multicast filtering off */
1179 return;
1182 /* Enable multicast filtering */
1183 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd);
1184 if (err != 0) {
1185 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI"
1186 ", error status: %d\n", err);
1190 static int
1191 mxge_max_mtu(mxge_softc_t *sc)
1193 mxge_cmd_t cmd;
1194 int status;
1196 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU)
1197 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1199 /* try to set nbufs to see if it we can
1200 use virtually contiguous jumbos */
1201 cmd.data0 = 0;
1202 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
1203 &cmd);
1204 if (status == 0)
1205 return MXGEFW_MAX_MTU - MXGEFW_PAD;
1207 /* otherwise, we're limited to MJUMPAGESIZE */
1208 return MJUMPAGESIZE - MXGEFW_PAD;
1211 static int
1212 mxge_reset(mxge_softc_t *sc, int interrupts_setup)
1214 struct mxge_slice_state *ss;
1215 mxge_rx_done_t *rx_done;
1216 volatile uint32_t *irq_claim;
1217 mxge_cmd_t cmd;
1218 int slice, status;
1220 /* try to send a reset command to the card to see if it
1221 is alive */
1222 memset(&cmd, 0, sizeof (cmd));
1223 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
1224 if (status != 0) {
1225 device_printf(sc->dev, "failed reset\n");
1226 return ENXIO;
1229 mxge_dummy_rdma(sc, 1);
1232 /* set the intrq size */
1233 cmd.data0 = sc->rx_ring_size;
1234 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
1237 * Even though we already know how many slices are supported
1238 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1239 * has magic side effects, and must be called after a reset.
1240 * It must be called prior to calling any RSS related cmds,
1241 * including assigning an interrupt queue for anything but
1242 * slice 0. It must also be called *after*
1243 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1244 * the firmware to compute offsets.
1247 if (sc->num_slices > 1) {
1248 /* ask the maximum number of slices it supports */
1249 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES,
1250 &cmd);
1251 if (status != 0) {
1252 device_printf(sc->dev,
1253 "failed to get number of slices\n");
1254 return status;
1257 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1258 * to setting up the interrupt queue DMA
1260 cmd.data0 = sc->num_slices;
1261 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
1262 #ifdef IFNET_BUF_RING
1263 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
1264 #endif
1265 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES,
1266 &cmd);
1267 if (status != 0) {
1268 device_printf(sc->dev,
1269 "failed to set number of slices\n");
1270 return status;
1275 if (interrupts_setup) {
1276 /* Now exchange information about interrupts */
1277 for (slice = 0; slice < sc->num_slices; slice++) {
1278 rx_done = &sc->ss[slice].rx_done;
1279 memset(rx_done->entry, 0, sc->rx_ring_size);
1280 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr);
1281 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr);
1282 cmd.data2 = slice;
1283 status |= mxge_send_cmd(sc,
1284 MXGEFW_CMD_SET_INTRQ_DMA,
1285 &cmd);
1289 status |= mxge_send_cmd(sc,
1290 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd);
1293 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0);
1295 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd);
1296 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0);
1299 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET,
1300 &cmd);
1301 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0);
1302 if (status != 0) {
1303 device_printf(sc->dev, "failed set interrupt parameters\n");
1304 return status;
1308 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay);
1311 /* run a DMA benchmark */
1312 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST);
1314 for (slice = 0; slice < sc->num_slices; slice++) {
1315 ss = &sc->ss[slice];
1317 ss->irq_claim = irq_claim + (2 * slice);
1318 /* reset mcp/driver shared state back to 0 */
1319 ss->rx_done.idx = 0;
1320 ss->rx_done.cnt = 0;
1321 ss->tx.req = 0;
1322 ss->tx.done = 0;
1323 ss->tx.pkt_done = 0;
1324 ss->tx.queue_active = 0;
1325 ss->tx.activate = 0;
1326 ss->tx.deactivate = 0;
1327 ss->tx.wake = 0;
1328 ss->tx.defrag = 0;
1329 ss->tx.stall = 0;
1330 ss->rx_big.cnt = 0;
1331 ss->rx_small.cnt = 0;
1332 ss->lro_bad_csum = 0;
1333 ss->lro_queued = 0;
1334 ss->lro_flushed = 0;
1335 if (ss->fw_stats != NULL) {
1336 ss->fw_stats->valid = 0;
1337 ss->fw_stats->send_done_count = 0;
1340 sc->rdma_tags_available = 15;
1341 status = mxge_update_mac_address(sc);
1342 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC);
1343 mxge_change_pause(sc, sc->pause);
1344 mxge_set_multicast_list(sc);
1345 return status;
1348 static int
1349 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS)
1351 mxge_softc_t *sc;
1352 unsigned int intr_coal_delay;
1353 int err;
1355 sc = arg1;
1356 intr_coal_delay = sc->intr_coal_delay;
1357 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req);
1358 if (err != 0) {
1359 return err;
1361 if (intr_coal_delay == sc->intr_coal_delay)
1362 return 0;
1364 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000)
1365 return EINVAL;
1367 lwkt_serialize_enter(sc->ifp->if_serializer);
1368 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay);
1369 sc->intr_coal_delay = intr_coal_delay;
1371 lwkt_serialize_exit(sc->ifp->if_serializer);
1372 return err;
1375 static int
1376 mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
1378 mxge_softc_t *sc;
1379 unsigned int enabled;
1380 int err;
1382 sc = arg1;
1383 enabled = sc->pause;
1384 err = sysctl_handle_int(oidp, &enabled, arg2, req);
1385 if (err != 0) {
1386 return err;
1388 if (enabled == sc->pause)
1389 return 0;
1391 lwkt_serialize_enter(sc->ifp->if_serializer);
1392 err = mxge_change_pause(sc, enabled);
1393 lwkt_serialize_exit(sc->ifp->if_serializer);
1394 return err;
1397 static int
1398 mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
1400 struct ifnet *ifp;
1401 int err = 0;
1403 ifp = sc->ifp;
1404 if (lro_cnt == 0)
1405 ifp->if_capenable &= ~IFCAP_LRO;
1406 else
1407 ifp->if_capenable |= IFCAP_LRO;
1408 sc->lro_cnt = lro_cnt;
1409 if (ifp->if_flags & IFF_RUNNING) {
1410 mxge_close(sc);
1411 err = mxge_open(sc);
1413 return err;
1416 static int
1417 mxge_change_lro(SYSCTL_HANDLER_ARGS)
1419 mxge_softc_t *sc;
1420 unsigned int lro_cnt;
1421 int err;
1423 sc = arg1;
1424 lro_cnt = sc->lro_cnt;
1425 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
1426 if (err != 0)
1427 return err;
1429 if (lro_cnt == sc->lro_cnt)
1430 return 0;
1432 if (lro_cnt > 128)
1433 return EINVAL;
1435 lwkt_serialize_enter(sc->ifp->if_serializer);
1436 err = mxge_change_lro_locked(sc, lro_cnt);
1437 lwkt_serialize_exit(sc->ifp->if_serializer);
1438 return err;
1441 static int
1442 mxge_handle_be32(SYSCTL_HANDLER_ARGS)
1444 int err;
1446 if (arg1 == NULL)
1447 return EFAULT;
1448 arg2 = be32toh(*(int *)arg1);
1449 arg1 = NULL;
1450 err = sysctl_handle_int(oidp, arg1, arg2, req);
1452 return err;
1455 static void
1456 mxge_rem_sysctls(mxge_softc_t *sc)
1458 struct mxge_slice_state *ss;
1459 int slice;
1461 if (sc->slice_sysctl_tree == NULL)
1462 return;
1464 for (slice = 0; slice < sc->num_slices; slice++) {
1465 ss = &sc->ss[slice];
1466 if (ss == NULL || ss->sysctl_tree == NULL)
1467 continue;
1468 sysctl_ctx_free(&ss->sysctl_ctx);
1469 ss->sysctl_tree = NULL;
1471 sysctl_ctx_free(&sc->slice_sysctl_ctx);
1472 sc->slice_sysctl_tree = NULL;
1473 sysctl_ctx_free(&sc->sysctl_ctx);
1474 sc->sysctl_tree = NULL;
1478 static void
1479 mxge_add_sysctls(mxge_softc_t *sc)
1481 struct sysctl_ctx_list *ctx;
1482 struct sysctl_oid_list *children;
1483 mcp_irq_data_t *fw;
1484 struct mxge_slice_state *ss;
1485 int slice;
1486 char slice_num[8];
1488 ctx = &sc->sysctl_ctx;
1489 sysctl_ctx_init(ctx);
1490 sc->sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw),
1491 OID_AUTO,
1492 device_get_nameunit(sc->dev),
1493 CTLFLAG_RD, 0, "");
1494 if (sc->sysctl_tree == NULL) {
1495 device_printf(sc->dev, "can't add sysctl node\n");
1496 return;
1499 children = SYSCTL_CHILDREN(sc->sysctl_tree);
1500 fw = sc->ss[0].fw_stats;
1502 /* random information */
1503 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1504 "firmware_version",
1505 CTLFLAG_RD, &sc->fw_version,
1506 0, "firmware version");
1507 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1508 "serial_number",
1509 CTLFLAG_RD, &sc->serial_number_string,
1510 0, "serial number");
1511 SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
1512 "product_code",
1513 CTLFLAG_RD, &sc->product_code_string,
1514 0, "product_code");
1515 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1516 "pcie_link_width",
1517 CTLFLAG_RD, &sc->link_width,
1518 0, "tx_boundary");
1519 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1520 "tx_boundary",
1521 CTLFLAG_RD, &sc->tx_boundary,
1522 0, "tx_boundary");
1523 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1524 "write_combine",
1525 CTLFLAG_RD, &sc->wc,
1526 0, "write combining PIO?");
1527 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1528 "read_dma_MBs",
1529 CTLFLAG_RD, &sc->read_dma,
1530 0, "DMA Read speed in MB/s");
1531 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1532 "write_dma_MBs",
1533 CTLFLAG_RD, &sc->write_dma,
1534 0, "DMA Write speed in MB/s");
1535 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1536 "read_write_dma_MBs",
1537 CTLFLAG_RD, &sc->read_write_dma,
1538 0, "DMA concurrent Read/Write speed in MB/s");
1541 /* performance related tunables */
1542 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1543 "intr_coal_delay",
1544 CTLTYPE_INT|CTLFLAG_RW, sc,
1545 0, mxge_change_intr_coal,
1546 "I", "interrupt coalescing delay in usecs");
1548 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1549 "flow_control_enabled",
1550 CTLTYPE_INT|CTLFLAG_RW, sc,
1551 0, mxge_change_flow_control,
1552 "I", "interrupt coalescing delay in usecs");
1554 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1555 "deassert_wait",
1556 CTLFLAG_RW, &mxge_deassert_wait,
1557 0, "Wait for IRQ line to go low in ihandler");
1559 /* stats block from firmware is in network byte order.
1560 Need to swap it */
1561 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1562 "link_up",
1563 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up,
1564 0, mxge_handle_be32,
1565 "I", "link up");
1566 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1567 "rdma_tags_available",
1568 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available,
1569 0, mxge_handle_be32,
1570 "I", "rdma_tags_available");
1571 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1572 "dropped_bad_crc32",
1573 CTLTYPE_INT|CTLFLAG_RD,
1574 &fw->dropped_bad_crc32,
1575 0, mxge_handle_be32,
1576 "I", "dropped_bad_crc32");
1577 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1578 "dropped_bad_phy",
1579 CTLTYPE_INT|CTLFLAG_RD,
1580 &fw->dropped_bad_phy,
1581 0, mxge_handle_be32,
1582 "I", "dropped_bad_phy");
1583 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1584 "dropped_link_error_or_filtered",
1585 CTLTYPE_INT|CTLFLAG_RD,
1586 &fw->dropped_link_error_or_filtered,
1587 0, mxge_handle_be32,
1588 "I", "dropped_link_error_or_filtered");
1589 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1590 "dropped_link_overflow",
1591 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow,
1592 0, mxge_handle_be32,
1593 "I", "dropped_link_overflow");
1594 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1595 "dropped_multicast_filtered",
1596 CTLTYPE_INT|CTLFLAG_RD,
1597 &fw->dropped_multicast_filtered,
1598 0, mxge_handle_be32,
1599 "I", "dropped_multicast_filtered");
1600 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1601 "dropped_no_big_buffer",
1602 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer,
1603 0, mxge_handle_be32,
1604 "I", "dropped_no_big_buffer");
1605 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1606 "dropped_no_small_buffer",
1607 CTLTYPE_INT|CTLFLAG_RD,
1608 &fw->dropped_no_small_buffer,
1609 0, mxge_handle_be32,
1610 "I", "dropped_no_small_buffer");
1611 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1612 "dropped_overrun",
1613 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun,
1614 0, mxge_handle_be32,
1615 "I", "dropped_overrun");
1616 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1617 "dropped_pause",
1618 CTLTYPE_INT|CTLFLAG_RD,
1619 &fw->dropped_pause,
1620 0, mxge_handle_be32,
1621 "I", "dropped_pause");
1622 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1623 "dropped_runt",
1624 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt,
1625 0, mxge_handle_be32,
1626 "I", "dropped_runt");
1628 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1629 "dropped_unicast_filtered",
1630 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered,
1631 0, mxge_handle_be32,
1632 "I", "dropped_unicast_filtered");
1634 /* verbose printing? */
1635 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1636 "verbose",
1637 CTLFLAG_RW, &mxge_verbose,
1638 0, "verbose printing");
1640 /* lro */
1641 SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
1642 "lro_cnt",
1643 CTLTYPE_INT|CTLFLAG_RW, sc,
1644 0, mxge_change_lro,
1645 "I", "number of lro merge queues");
1648 /* add counters exported for debugging from all slices */
1649 sysctl_ctx_init(&sc->slice_sysctl_ctx);
1650 sc->slice_sysctl_tree =
1651 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO,
1652 "slice", CTLFLAG_RD, 0, "");
1654 for (slice = 0; slice < sc->num_slices; slice++) {
1655 ss = &sc->ss[slice];
1656 sysctl_ctx_init(&ss->sysctl_ctx);
1657 ctx = &ss->sysctl_ctx;
1658 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree);
1659 ksprintf(slice_num, "%d", slice);
1660 ss->sysctl_tree =
1661 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num,
1662 CTLFLAG_RD, 0, "");
1663 children = SYSCTL_CHILDREN(ss->sysctl_tree);
1664 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1665 "rx_small_cnt",
1666 CTLFLAG_RD, &ss->rx_small.cnt,
1667 0, "rx_small_cnt");
1668 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1669 "rx_big_cnt",
1670 CTLFLAG_RD, &ss->rx_big.cnt,
1671 0, "rx_small_cnt");
1672 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1673 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
1674 0, "number of lro merge queues flushed");
1676 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1677 "lro_queued", CTLFLAG_RD, &ss->lro_queued,
1678 0, "number of frames appended to lro merge"
1679 "queues");
1681 #ifndef IFNET_BUF_RING
1682 /* only transmit from slice 0 for now */
1683 if (slice > 0)
1684 continue;
1685 #endif
1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1687 "tx_req",
1688 CTLFLAG_RD, &ss->tx.req,
1689 0, "tx_req");
1691 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1692 "tx_done",
1693 CTLFLAG_RD, &ss->tx.done,
1694 0, "tx_done");
1695 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1696 "tx_pkt_done",
1697 CTLFLAG_RD, &ss->tx.pkt_done,
1698 0, "tx_done");
1699 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1700 "tx_stall",
1701 CTLFLAG_RD, &ss->tx.stall,
1702 0, "tx_stall");
1703 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1704 "tx_wake",
1705 CTLFLAG_RD, &ss->tx.wake,
1706 0, "tx_wake");
1707 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1708 "tx_defrag",
1709 CTLFLAG_RD, &ss->tx.defrag,
1710 0, "tx_defrag");
1711 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1712 "tx_queue_active",
1713 CTLFLAG_RD, &ss->tx.queue_active,
1714 0, "tx_queue_active");
1715 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1716 "tx_activate",
1717 CTLFLAG_RD, &ss->tx.activate,
1718 0, "tx_activate");
1719 SYSCTL_ADD_INT(ctx, children, OID_AUTO,
1720 "tx_deactivate",
1721 CTLFLAG_RD, &ss->tx.deactivate,
1722 0, "tx_deactivate");
1726 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1727 backwards one at a time and handle ring wraps */
1729 static inline void
1730 mxge_submit_req_backwards(mxge_tx_ring_t *tx,
1731 mcp_kreq_ether_send_t *src, int cnt)
1733 int idx, starting_slot;
1734 starting_slot = tx->req;
1735 while (cnt > 1) {
1736 cnt--;
1737 idx = (starting_slot + cnt) & tx->mask;
1738 mxge_pio_copy(&tx->lanai[idx],
1739 &src[cnt], sizeof(*src));
1740 wmb();
1745 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1746 * at most 32 bytes at a time, so as to avoid involving the software
1747 * pio handler in the nic. We re-write the first segment's flags
1748 * to mark them valid only after writing the entire chain
1751 static inline void
1752 mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src,
1753 int cnt)
1755 int idx, i;
1756 uint32_t *src_ints;
1757 volatile uint32_t *dst_ints;
1758 mcp_kreq_ether_send_t *srcp;
1759 volatile mcp_kreq_ether_send_t *dstp, *dst;
1760 uint8_t last_flags;
1762 idx = tx->req & tx->mask;
1764 last_flags = src->flags;
1765 src->flags = 0;
1766 wmb();
1767 dst = dstp = &tx->lanai[idx];
1768 srcp = src;
1770 if ((idx + cnt) < tx->mask) {
1771 for (i = 0; i < (cnt - 1); i += 2) {
1772 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src));
1773 wmb(); /* force write every 32 bytes */
1774 srcp += 2;
1775 dstp += 2;
1777 } else {
1778 /* submit all but the first request, and ensure
1779 that it is submitted below */
1780 mxge_submit_req_backwards(tx, src, cnt);
1781 i = 0;
1783 if (i < cnt) {
1784 /* submit the first request */
1785 mxge_pio_copy(dstp, srcp, sizeof(*src));
1786 wmb(); /* barrier before setting valid flag */
1789 /* re-write the last 32-bits with the valid flags */
1790 src->flags = last_flags;
1791 src_ints = (uint32_t *)src;
1792 src_ints+=3;
1793 dst_ints = (volatile uint32_t *)dst;
1794 dst_ints+=3;
1795 *dst_ints = *src_ints;
1796 tx->req += cnt;
1797 wmb();
1800 #if IFCAP_TSO4
1802 static void
1803 mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m,
1804 int busdma_seg_cnt, int ip_off)
1806 mxge_tx_ring_t *tx;
1807 mcp_kreq_ether_send_t *req;
1808 bus_dma_segment_t *seg;
1809 struct ip *ip;
1810 struct tcphdr *tcp;
1811 uint32_t low, high_swapped;
1812 int len, seglen, cum_len, cum_len_next;
1813 int next_is_first, chop, cnt, rdma_count, small;
1814 uint16_t pseudo_hdr_offset, cksum_offset, mss;
1815 uint8_t flags, flags_next;
1816 static int once;
1818 mss = m->m_pkthdr.tso_segsz;
1820 /* negative cum_len signifies to the
1821 * send loop that we are still in the
1822 * header portion of the TSO packet.
1825 /* ensure we have the ethernet, IP and TCP
1826 header together in the first mbuf, copy
1827 it to a scratch buffer if not */
1828 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
1829 m_copydata(m, 0, ip_off + sizeof (*ip),
1830 ss->scratch);
1831 ip = (struct ip *)(ss->scratch + ip_off);
1832 } else {
1833 ip = (struct ip *)(mtod(m, char *) + ip_off);
1835 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2)
1836 + sizeof (*tcp))) {
1837 m_copydata(m, 0, ip_off + (ip->ip_hl << 2)
1838 + sizeof (*tcp), ss->scratch);
1839 ip = (struct ip *)(mtod(m, char *) + ip_off);
1842 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
1843 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2));
1845 /* TSO implies checksum offload on this hardware */
1846 cksum_offset = ip_off + (ip->ip_hl << 2);
1847 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST;
1850 /* for TSO, pseudo_hdr_offset holds mss.
1851 * The firmware figures out where to put
1852 * the checksum by parsing the header. */
1853 pseudo_hdr_offset = htobe16(mss);
1855 tx = &ss->tx;
1856 req = tx->req_list;
1857 seg = tx->seg_list;
1858 cnt = 0;
1859 rdma_count = 0;
1860 /* "rdma_count" is the number of RDMAs belonging to the
1861 * current packet BEFORE the current send request. For
1862 * non-TSO packets, this is equal to "count".
1863 * For TSO packets, rdma_count needs to be reset
1864 * to 0 after a segment cut.
1866 * The rdma_count field of the send request is
1867 * the number of RDMAs of the packet starting at
1868 * that request. For TSO send requests with one ore more cuts
1869 * in the middle, this is the number of RDMAs starting
1870 * after the last cut in the request. All previous
1871 * segments before the last cut implicitly have 1 RDMA.
1873 * Since the number of RDMAs is not known beforehand,
1874 * it must be filled-in retroactively - after each
1875 * segmentation cut or at the end of the entire packet.
1878 while (busdma_seg_cnt) {
1879 /* Break the busdma segment up into pieces*/
1880 low = MXGE_LOWPART_TO_U32(seg->ds_addr);
1881 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
1882 len = seg->ds_len;
1884 while (len) {
1885 flags_next = flags & ~MXGEFW_FLAGS_FIRST;
1886 seglen = len;
1887 cum_len_next = cum_len + seglen;
1888 (req-rdma_count)->rdma_count = rdma_count + 1;
1889 if (__predict_true(cum_len >= 0)) {
1890 /* payload */
1891 chop = (cum_len_next > mss);
1892 cum_len_next = cum_len_next % mss;
1893 next_is_first = (cum_len_next == 0);
1894 flags |= chop * MXGEFW_FLAGS_TSO_CHOP;
1895 flags_next |= next_is_first *
1896 MXGEFW_FLAGS_FIRST;
1897 rdma_count |= -(chop | next_is_first);
1898 rdma_count += chop & !next_is_first;
1899 } else if (cum_len_next >= 0) {
1900 /* header ends */
1901 rdma_count = -1;
1902 cum_len_next = 0;
1903 seglen = -cum_len;
1904 small = (mss <= MXGEFW_SEND_SMALL_SIZE);
1905 flags_next = MXGEFW_FLAGS_TSO_PLD |
1906 MXGEFW_FLAGS_FIRST |
1907 (small * MXGEFW_FLAGS_SMALL);
1910 req->addr_high = high_swapped;
1911 req->addr_low = htobe32(low);
1912 req->pseudo_hdr_offset = pseudo_hdr_offset;
1913 req->pad = 0;
1914 req->rdma_count = 1;
1915 req->length = htobe16(seglen);
1916 req->cksum_offset = cksum_offset;
1917 req->flags = flags | ((cum_len & 1) *
1918 MXGEFW_FLAGS_ALIGN_ODD);
1919 low += seglen;
1920 len -= seglen;
1921 cum_len = cum_len_next;
1922 flags = flags_next;
1923 req++;
1924 cnt++;
1925 rdma_count++;
1926 if (__predict_false(cksum_offset > seglen))
1927 cksum_offset -= seglen;
1928 else
1929 cksum_offset = 0;
1930 if (__predict_false(cnt > tx->max_desc))
1931 goto drop;
1933 busdma_seg_cnt--;
1934 seg++;
1936 (req-rdma_count)->rdma_count = rdma_count;
1938 do {
1939 req--;
1940 req->flags |= MXGEFW_FLAGS_TSO_LAST;
1941 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST)));
1943 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
1944 mxge_submit_req(tx, tx->req_list, cnt);
1945 #ifdef IFNET_BUF_RING
1946 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
1947 /* tell the NIC to start polling this slice */
1948 *tx->send_go = 1;
1949 tx->queue_active = 1;
1950 tx->activate++;
1951 wmb();
1953 #endif
1954 return;
1956 drop:
1957 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map);
1958 m_freem(m);
1959 ss->oerrors++;
1960 if (!once) {
1961 kprintf("tx->max_desc exceeded via TSO!\n");
1962 kprintf("mss = %d, %ld, %d!\n", mss,
1963 (long)seg - (long)tx->seg_list, tx->max_desc);
1964 once = 1;
1966 return;
1970 #endif /* IFCAP_TSO4 */
1972 #ifdef MXGE_NEW_VLAN_API
1974 * We reproduce the software vlan tag insertion from
1975 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1976 * vlan tag insertion. We need to advertise this in order to have the
1977 * vlan interface respect our csum offload flags.
1979 static struct mbuf *
1980 mxge_vlan_tag_insert(struct mbuf *m)
1982 struct ether_vlan_header *evl;
1984 M_PREPEND(m, EVL_ENCAPLEN, MB_DONTWAIT);
1985 if (__predict_false(m == NULL))
1986 return NULL;
1987 if (m->m_len < sizeof(*evl)) {
1988 m = m_pullup(m, sizeof(*evl));
1989 if (__predict_false(m == NULL))
1990 return NULL;
1993 * Transform the Ethernet header into an Ethernet header
1994 * with 802.1Q encapsulation.
1996 evl = mtod(m, struct ether_vlan_header *);
1997 bcopy((char *)evl + EVL_ENCAPLEN,
1998 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1999 evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
2000 evl->evl_tag = htons(m->m_pkthdr.ether_vlantag);
2001 m->m_flags &= ~M_VLANTAG;
2002 return m;
2004 #endif /* MXGE_NEW_VLAN_API */
2006 static void
2007 mxge_encap(struct mxge_slice_state *ss, struct mbuf *m)
2009 mxge_softc_t *sc;
2010 mcp_kreq_ether_send_t *req;
2011 bus_dma_segment_t *seg;
2012 struct mbuf *m_tmp;
2013 struct ifnet *ifp;
2014 mxge_tx_ring_t *tx;
2015 struct ip *ip;
2016 int cnt, cum_len, err, i, idx, odd_flag, ip_off;
2017 uint16_t pseudo_hdr_offset;
2018 uint8_t flags, cksum_offset;
2021 sc = ss->sc;
2022 ifp = sc->ifp;
2023 tx = &ss->tx;
2025 ip_off = sizeof (struct ether_header);
2026 #ifdef MXGE_NEW_VLAN_API
2027 if (m->m_flags & M_VLANTAG) {
2028 m = mxge_vlan_tag_insert(m);
2029 if (__predict_false(m == NULL))
2030 goto drop;
2031 ip_off += EVL_ENCAPLEN;
2033 #endif
2034 /* (try to) map the frame for DMA */
2035 idx = tx->req & tx->mask;
2036 err = bus_dmamap_load_mbuf_segment(tx->dmat, tx->info[idx].map,
2037 m, tx->seg_list, 1, &cnt,
2038 BUS_DMA_NOWAIT);
2039 if (__predict_false(err == EFBIG)) {
2040 /* Too many segments in the chain. Try
2041 to defrag */
2042 m_tmp = m_defrag(m, M_NOWAIT);
2043 if (m_tmp == NULL) {
2044 goto drop;
2046 ss->tx.defrag++;
2047 m = m_tmp;
2048 err = bus_dmamap_load_mbuf_segment(tx->dmat,
2049 tx->info[idx].map,
2050 m, tx->seg_list, 1, &cnt,
2051 BUS_DMA_NOWAIT);
2053 if (__predict_false(err != 0)) {
2054 device_printf(sc->dev, "bus_dmamap_load_mbuf_segment returned %d"
2055 " packet len = %d\n", err, m->m_pkthdr.len);
2056 goto drop;
2058 bus_dmamap_sync(tx->dmat, tx->info[idx].map,
2059 BUS_DMASYNC_PREWRITE);
2060 tx->info[idx].m = m;
2062 #if IFCAP_TSO4
2063 /* TSO is different enough, we handle it in another routine */
2064 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) {
2065 mxge_encap_tso(ss, m, cnt, ip_off);
2066 return;
2068 #endif
2070 req = tx->req_list;
2071 cksum_offset = 0;
2072 pseudo_hdr_offset = 0;
2073 flags = MXGEFW_FLAGS_NO_TSO;
2075 /* checksum offloading? */
2076 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) {
2077 /* ensure ip header is in first mbuf, copy
2078 it to a scratch buffer if not */
2079 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) {
2080 m_copydata(m, 0, ip_off + sizeof (*ip),
2081 ss->scratch);
2082 ip = (struct ip *)(ss->scratch + ip_off);
2083 } else {
2084 ip = (struct ip *)(mtod(m, char *) + ip_off);
2086 cksum_offset = ip_off + (ip->ip_hl << 2);
2087 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data;
2088 pseudo_hdr_offset = htobe16(pseudo_hdr_offset);
2089 req->cksum_offset = cksum_offset;
2090 flags |= MXGEFW_FLAGS_CKSUM;
2091 odd_flag = MXGEFW_FLAGS_ALIGN_ODD;
2092 } else {
2093 odd_flag = 0;
2095 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE)
2096 flags |= MXGEFW_FLAGS_SMALL;
2098 /* convert segments into a request list */
2099 cum_len = 0;
2100 seg = tx->seg_list;
2101 req->flags = MXGEFW_FLAGS_FIRST;
2102 for (i = 0; i < cnt; i++) {
2103 req->addr_low =
2104 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2105 req->addr_high =
2106 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2107 req->length = htobe16(seg->ds_len);
2108 req->cksum_offset = cksum_offset;
2109 if (cksum_offset > seg->ds_len)
2110 cksum_offset -= seg->ds_len;
2111 else
2112 cksum_offset = 0;
2113 req->pseudo_hdr_offset = pseudo_hdr_offset;
2114 req->pad = 0; /* complete solid 16-byte block */
2115 req->rdma_count = 1;
2116 req->flags |= flags | ((cum_len & 1) * odd_flag);
2117 cum_len += seg->ds_len;
2118 seg++;
2119 req++;
2120 req->flags = 0;
2122 req--;
2123 /* pad runts to 60 bytes */
2124 if (cum_len < 60) {
2125 req++;
2126 req->addr_low =
2127 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr));
2128 req->addr_high =
2129 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr));
2130 req->length = htobe16(60 - cum_len);
2131 req->cksum_offset = 0;
2132 req->pseudo_hdr_offset = pseudo_hdr_offset;
2133 req->pad = 0; /* complete solid 16-byte block */
2134 req->rdma_count = 1;
2135 req->flags |= flags | ((cum_len & 1) * odd_flag);
2136 cnt++;
2139 tx->req_list[0].rdma_count = cnt;
2140 #if 0
2141 /* print what the firmware will see */
2142 for (i = 0; i < cnt; i++) {
2143 kprintf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2144 "cso:%d, flags:0x%x, rdma:%d\n",
2145 i, (int)ntohl(tx->req_list[i].addr_high),
2146 (int)ntohl(tx->req_list[i].addr_low),
2147 (int)ntohs(tx->req_list[i].length),
2148 (int)ntohs(tx->req_list[i].pseudo_hdr_offset),
2149 tx->req_list[i].cksum_offset, tx->req_list[i].flags,
2150 tx->req_list[i].rdma_count);
2152 kprintf("--------------\n");
2153 #endif
2154 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1;
2155 mxge_submit_req(tx, tx->req_list, cnt);
2156 #ifdef IFNET_BUF_RING
2157 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) {
2158 /* tell the NIC to start polling this slice */
2159 *tx->send_go = 1;
2160 tx->queue_active = 1;
2161 tx->activate++;
2162 wmb();
2164 #endif
2165 return;
2167 drop:
2168 m_freem(m);
2169 ss->oerrors++;
2170 return;
2173 #ifdef IFNET_BUF_RING
2174 static void
2175 mxge_qflush(struct ifnet *ifp)
2177 mxge_softc_t *sc = ifp->if_softc;
2178 mxge_tx_ring_t *tx;
2179 struct mbuf *m;
2180 int slice;
2182 for (slice = 0; slice < sc->num_slices; slice++) {
2183 tx = &sc->ss[slice].tx;
2184 lwkt_serialize_enter(sc->ifp->if_serializer);
2185 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL)
2186 m_freem(m);
2187 lwkt_serialize_exit(sc->ifp->if_serializer);
2189 if_qflush(ifp);
2192 static inline void
2193 mxge_start_locked(struct mxge_slice_state *ss)
2195 mxge_softc_t *sc;
2196 struct mbuf *m;
2197 struct ifnet *ifp;
2198 mxge_tx_ring_t *tx;
2200 sc = ss->sc;
2201 ifp = sc->ifp;
2202 tx = &ss->tx;
2204 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2205 m = drbr_dequeue(ifp, tx->br);
2206 if (m == NULL) {
2207 return;
2209 /* let BPF see it */
2210 BPF_MTAP(ifp, m);
2212 /* give it to the nic */
2213 mxge_encap(ss, m);
2215 /* ran out of transmit slots */
2216 if (((ss->if_flags & IFF_OACTIVE) == 0)
2217 && (!drbr_empty(ifp, tx->br))) {
2218 ss->if_flags |= IFF_OACTIVE;
2219 tx->stall++;
2223 static int
2224 mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m)
2226 mxge_softc_t *sc;
2227 struct ifnet *ifp;
2228 mxge_tx_ring_t *tx;
2229 int err;
2231 sc = ss->sc;
2232 ifp = sc->ifp;
2233 tx = &ss->tx;
2235 if ((ss->if_flags & (IFF_RUNNING|IFF_OACTIVE)) !=
2236 IFF_RUNNING) {
2237 err = drbr_enqueue(ifp, tx->br, m);
2238 return (err);
2241 if (drbr_empty(ifp, tx->br) &&
2242 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) {
2243 /* let BPF see it */
2244 BPF_MTAP(ifp, m);
2245 /* give it to the nic */
2246 mxge_encap(ss, m);
2247 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) {
2248 return (err);
2250 if (!drbr_empty(ifp, tx->br))
2251 mxge_start_locked(ss);
2252 return (0);
2255 static int
2256 mxge_transmit(struct ifnet *ifp, struct mbuf *m)
2258 mxge_softc_t *sc = ifp->if_softc;
2259 struct mxge_slice_state *ss;
2260 mxge_tx_ring_t *tx;
2261 int err = 0;
2262 int slice;
2264 #if 0
2265 slice = m->m_pkthdr.flowid;
2266 #endif
2267 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */
2269 ss = &sc->ss[slice];
2270 tx = &ss->tx;
2272 if(lwkt_serialize_try(ifp->if_serializer)) {
2273 err = mxge_transmit_locked(ss, m);
2274 lwkt_serialize_exit(ifp->if_serializer);
2275 } else {
2276 err = drbr_enqueue(ifp, tx->br, m);
2279 return (err);
2282 #else
2284 static inline void
2285 mxge_start_locked(struct mxge_slice_state *ss)
2287 mxge_softc_t *sc;
2288 struct mbuf *m;
2289 struct ifnet *ifp;
2290 mxge_tx_ring_t *tx;
2292 sc = ss->sc;
2293 ifp = sc->ifp;
2294 tx = &ss->tx;
2295 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) {
2296 m = ifq_dequeue(&ifp->if_snd, NULL);
2297 if (m == NULL) {
2298 return;
2300 /* let BPF see it */
2301 BPF_MTAP(ifp, m);
2303 /* give it to the nic */
2304 mxge_encap(ss, m);
2306 /* ran out of transmit slots */
2307 if ((sc->ifp->if_flags & IFF_OACTIVE) == 0) {
2308 sc->ifp->if_flags |= IFF_OACTIVE;
2309 tx->stall++;
2312 #endif
2313 static void
2314 mxge_start(struct ifnet *ifp)
2316 mxge_softc_t *sc = ifp->if_softc;
2317 struct mxge_slice_state *ss;
2319 ASSERT_SERIALIZED(sc->ifp->if_serializer);
2320 /* only use the first slice for now */
2321 ss = &sc->ss[0];
2322 mxge_start_locked(ss);
2326 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2327 * at most 32 bytes at a time, so as to avoid involving the software
2328 * pio handler in the nic. We re-write the first segment's low
2329 * DMA address to mark it valid only after we write the entire chunk
2330 * in a burst
2332 static inline void
2333 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst,
2334 mcp_kreq_ether_recv_t *src)
2336 uint32_t low;
2338 low = src->addr_low;
2339 src->addr_low = 0xffffffff;
2340 mxge_pio_copy(dst, src, 4 * sizeof (*src));
2341 wmb();
2342 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src));
2343 wmb();
2344 src->addr_low = low;
2345 dst->addr_low = low;
2346 wmb();
2349 static int
2350 mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2352 bus_dma_segment_t seg;
2353 struct mbuf *m;
2354 mxge_rx_ring_t *rx = &ss->rx_small;
2355 int cnt, err;
2357 m = m_gethdr(MB_DONTWAIT, MT_DATA);
2358 if (m == NULL) {
2359 rx->alloc_fail++;
2360 err = ENOBUFS;
2361 goto done;
2363 m->m_len = m->m_pkthdr.len = MHLEN;
2364 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2365 &seg, 1, &cnt, BUS_DMA_NOWAIT);
2366 if (err != 0) {
2367 kprintf("can't dmamap small (%d)\n", err);
2368 m_free(m);
2369 goto done;
2371 rx->info[idx].m = m;
2372 rx->shadow[idx].addr_low =
2373 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr));
2374 rx->shadow[idx].addr_high =
2375 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr));
2377 done:
2378 if ((idx & 7) == 7)
2379 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]);
2380 return err;
2384 static int
2385 mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx)
2387 bus_dma_segment_t seg[3];
2388 struct mbuf *m;
2389 mxge_rx_ring_t *rx = &ss->rx_big;
2390 int cnt, err, i;
2392 if (rx->cl_size == MCLBYTES)
2393 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
2394 else {
2395 #if 0
2396 m = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size);
2397 #else
2399 * XXX: allocate normal sized buffers for big buffers.
2400 * We should be fine as long as we don't get any jumbo frames
2402 m = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
2403 #endif
2405 if (m == NULL) {
2406 rx->alloc_fail++;
2407 err = ENOBUFS;
2408 goto done;
2410 m->m_pkthdr.len = 0;
2411 m->m_len = m->m_pkthdr.len = rx->mlen;
2412 err = bus_dmamap_load_mbuf_segment(rx->dmat, map, m,
2413 seg, 1, &cnt, BUS_DMA_NOWAIT);
2414 if (err != 0) {
2415 kprintf("can't dmamap big (%d)\n", err);
2416 m_free(m);
2417 goto done;
2419 rx->info[idx].m = m;
2420 rx->shadow[idx].addr_low =
2421 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr));
2422 rx->shadow[idx].addr_high =
2423 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr));
2425 #if MXGE_VIRT_JUMBOS
2426 for (i = 1; i < cnt; i++) {
2427 rx->shadow[idx + i].addr_low =
2428 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr));
2429 rx->shadow[idx + i].addr_high =
2430 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr));
2432 #endif
2434 done:
2435 for (i = 0; i < rx->nbufs; i++) {
2436 if ((idx & 7) == 7) {
2437 mxge_submit_8rx(&rx->lanai[idx - 7],
2438 &rx->shadow[idx - 7]);
2440 idx++;
2442 return err;
2446 * Myri10GE hardware checksums are not valid if the sender
2447 * padded the frame with non-zero padding. This is because
2448 * the firmware just does a simple 16-bit 1s complement
2449 * checksum across the entire frame, excluding the first 14
2450 * bytes. It is best to simply to check the checksum and
2451 * tell the stack about it only if the checksum is good
2454 static inline uint16_t
2455 mxge_rx_csum(struct mbuf *m, int csum)
2457 struct ether_header *eh;
2458 struct ip *ip;
2459 uint16_t c;
2461 eh = mtod(m, struct ether_header *);
2463 /* only deal with IPv4 TCP & UDP for now */
2464 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
2465 return 1;
2466 ip = (struct ip *)(eh + 1);
2467 if (__predict_false(ip->ip_p != IPPROTO_TCP &&
2468 ip->ip_p != IPPROTO_UDP))
2469 return 1;
2470 #ifdef INET
2471 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2472 htonl(ntohs(csum) + ntohs(ip->ip_len) +
2473 - (ip->ip_hl << 2) + ip->ip_p));
2474 #else
2475 c = 1;
2476 #endif
2477 c ^= 0xffff;
2478 return (c);
2481 static void
2482 mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)
2484 struct ether_vlan_header *evl;
2485 struct ether_header *eh;
2486 uint32_t partial;
2488 evl = mtod(m, struct ether_vlan_header *);
2489 eh = mtod(m, struct ether_header *);
2492 * fix checksum by subtracting EVL_ENCAPLEN bytes
2493 * after what the firmware thought was the end of the ethernet
2494 * header.
2497 /* put checksum into host byte order */
2498 *csum = ntohs(*csum);
2499 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN));
2500 (*csum) += ~partial;
2501 (*csum) += ((*csum) < ~partial);
2502 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2503 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF);
2505 /* restore checksum to network byte order;
2506 later consumers expect this */
2507 *csum = htons(*csum);
2509 /* save the tag */
2510 #ifdef MXGE_NEW_VLAN_API
2511 m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
2512 #else
2514 struct m_tag *mtag;
2515 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int),
2516 MB_DONTWAIT);
2517 if (mtag == NULL)
2518 return;
2519 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag);
2520 m_tag_prepend(m, mtag);
2523 #endif
2524 m->m_flags |= M_VLANTAG;
2527 * Remove the 802.1q header by copying the Ethernet
2528 * addresses over it and adjusting the beginning of
2529 * the data in the mbuf. The encapsulated Ethernet
2530 * type field is already in place.
2532 bcopy((char *)evl, (char *)evl + EVL_ENCAPLEN,
2533 ETHER_HDR_LEN - ETHER_TYPE_LEN);
2534 m_adj(m, EVL_ENCAPLEN);
2538 static inline void
2539 mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2541 mxge_softc_t *sc;
2542 struct ifnet *ifp;
2543 struct mbuf *m;
2544 struct ether_header *eh;
2545 mxge_rx_ring_t *rx;
2546 bus_dmamap_t old_map;
2547 int idx;
2548 uint16_t tcpudp_csum;
2550 sc = ss->sc;
2551 ifp = sc->ifp;
2552 rx = &ss->rx_big;
2553 idx = rx->cnt & rx->mask;
2554 rx->cnt += rx->nbufs;
2555 /* save a pointer to the received mbuf */
2556 m = rx->info[idx].m;
2557 /* try to replace the received mbuf */
2558 if (mxge_get_buf_big(ss, rx->extra_map, idx)) {
2559 /* drop the frame -- the old mbuf is re-cycled */
2560 ifp->if_ierrors++;
2561 return;
2564 /* unmap the received buffer */
2565 old_map = rx->info[idx].map;
2566 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2567 bus_dmamap_unload(rx->dmat, old_map);
2569 /* swap the bus_dmamap_t's */
2570 rx->info[idx].map = rx->extra_map;
2571 rx->extra_map = old_map;
2573 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2574 * aligned */
2575 m->m_data += MXGEFW_PAD;
2577 m->m_pkthdr.rcvif = ifp;
2578 m->m_len = m->m_pkthdr.len = len;
2579 ss->ipackets++;
2580 eh = mtod(m, struct ether_header *);
2581 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2582 mxge_vlan_tag_remove(m, &csum);
2584 /* if the checksum is valid, mark it in the mbuf header */
2585 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2586 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2587 return;
2588 /* otherwise, it was a UDP frame, or a TCP frame which
2589 we could not do LRO on. Tell the stack that the
2590 checksum is good */
2591 m->m_pkthdr.csum_data = 0xffff;
2592 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2594 #if 0
2595 /* flowid only valid if RSS hashing is enabled */
2596 if (sc->num_slices > 1) {
2597 m->m_pkthdr.flowid = (ss - sc->ss);
2598 m->m_flags |= M_FLOWID;
2600 #endif
2601 /* pass the frame up the stack */
2602 (*ifp->if_input)(ifp, m);
2605 static inline void
2606 mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
2608 mxge_softc_t *sc;
2609 struct ifnet *ifp;
2610 struct ether_header *eh;
2611 struct mbuf *m;
2612 mxge_rx_ring_t *rx;
2613 bus_dmamap_t old_map;
2614 int idx;
2615 uint16_t tcpudp_csum;
2617 sc = ss->sc;
2618 ifp = sc->ifp;
2619 rx = &ss->rx_small;
2620 idx = rx->cnt & rx->mask;
2621 rx->cnt++;
2622 /* save a pointer to the received mbuf */
2623 m = rx->info[idx].m;
2624 /* try to replace the received mbuf */
2625 if (mxge_get_buf_small(ss, rx->extra_map, idx)) {
2626 /* drop the frame -- the old mbuf is re-cycled */
2627 ifp->if_ierrors++;
2628 return;
2631 /* unmap the received buffer */
2632 old_map = rx->info[idx].map;
2633 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD);
2634 bus_dmamap_unload(rx->dmat, old_map);
2636 /* swap the bus_dmamap_t's */
2637 rx->info[idx].map = rx->extra_map;
2638 rx->extra_map = old_map;
2640 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2641 * aligned */
2642 m->m_data += MXGEFW_PAD;
2644 m->m_pkthdr.rcvif = ifp;
2645 m->m_len = m->m_pkthdr.len = len;
2646 ss->ipackets++;
2647 eh = mtod(m, struct ether_header *);
2648 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2649 mxge_vlan_tag_remove(m, &csum);
2651 /* if the checksum is valid, mark it in the mbuf header */
2652 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
2653 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
2654 return;
2655 /* otherwise, it was a UDP frame, or a TCP frame which
2656 we could not do LRO on. Tell the stack that the
2657 checksum is good */
2658 m->m_pkthdr.csum_data = 0xffff;
2659 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
2661 #if 0
2662 /* flowid only valid if RSS hashing is enabled */
2663 if (sc->num_slices > 1) {
2664 m->m_pkthdr.flowid = (ss - sc->ss);
2665 m->m_flags |= M_FLOWID;
2667 #endif
2668 /* pass the frame up the stack */
2669 (*ifp->if_input)(ifp, m);
2672 static inline void
2673 mxge_clean_rx_done(struct mxge_slice_state *ss)
2675 mxge_rx_done_t *rx_done = &ss->rx_done;
2676 int limit = 0;
2677 uint16_t length;
2678 uint16_t checksum;
2681 while (rx_done->entry[rx_done->idx].length != 0) {
2682 length = ntohs(rx_done->entry[rx_done->idx].length);
2683 rx_done->entry[rx_done->idx].length = 0;
2684 checksum = rx_done->entry[rx_done->idx].checksum;
2685 if (length <= (MHLEN - MXGEFW_PAD))
2686 mxge_rx_done_small(ss, length, checksum);
2687 else
2688 mxge_rx_done_big(ss, length, checksum);
2689 rx_done->cnt++;
2690 rx_done->idx = rx_done->cnt & rx_done->mask;
2692 /* limit potential for livelock */
2693 if (__predict_false(++limit > rx_done->mask / 2))
2694 break;
2696 #ifdef INET
2697 while (!SLIST_EMPTY(&ss->lro_active)) {
2698 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
2699 SLIST_REMOVE_HEAD(&ss->lro_active, next);
2700 mxge_lro_flush(ss, lro);
2702 #endif
2706 static inline void
2707 mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx)
2709 struct ifnet *ifp;
2710 mxge_tx_ring_t *tx;
2711 struct mbuf *m;
2712 bus_dmamap_t map;
2713 int idx;
2714 int *flags;
2716 tx = &ss->tx;
2717 ifp = ss->sc->ifp;
2718 ASSERT_SERIALIZED(ifp->if_serializer);
2719 while (tx->pkt_done != mcp_idx) {
2720 idx = tx->done & tx->mask;
2721 tx->done++;
2722 m = tx->info[idx].m;
2723 /* mbuf and DMA map only attached to the first
2724 segment per-mbuf */
2725 if (m != NULL) {
2726 ss->obytes += m->m_pkthdr.len;
2727 if (m->m_flags & M_MCAST)
2728 ss->omcasts++;
2729 ss->opackets++;
2730 tx->info[idx].m = NULL;
2731 map = tx->info[idx].map;
2732 bus_dmamap_unload(tx->dmat, map);
2733 m_freem(m);
2735 if (tx->info[idx].flag) {
2736 tx->info[idx].flag = 0;
2737 tx->pkt_done++;
2741 /* If we have space, clear IFF_OACTIVE to tell the stack that
2742 its OK to send packets */
2743 #ifdef IFNET_BUF_RING
2744 flags = &ss->if_flags;
2745 #else
2746 flags = &ifp->if_flags;
2747 #endif
2748 if ((*flags) & IFF_OACTIVE &&
2749 tx->req - tx->done < (tx->mask + 1)/4) {
2750 *(flags) &= ~IFF_OACTIVE;
2751 ss->tx.wake++;
2752 mxge_start_locked(ss);
2754 #ifdef IFNET_BUF_RING
2755 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) {
2756 /* let the NIC stop polling this queue, since there
2757 * are no more transmits pending */
2758 if (tx->req == tx->done) {
2759 *tx->send_stop = 1;
2760 tx->queue_active = 0;
2761 tx->deactivate++;
2762 wmb();
2765 #endif
2769 static struct mxge_media_type mxge_xfp_media_types[] =
2771 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"},
2772 {IFM_10G_SR, (1 << 7), "10GBASE-SR"},
2773 {IFM_10G_LR, (1 << 6), "10GBASE-LR"},
2774 {0, (1 << 5), "10GBASE-ER"},
2775 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"},
2776 {0, (1 << 3), "10GBASE-SW"},
2777 {0, (1 << 2), "10GBASE-LW"},
2778 {0, (1 << 1), "10GBASE-EW"},
2779 {0, (1 << 0), "Reserved"}
2781 static struct mxge_media_type mxge_sfp_media_types[] =
2783 {0, (1 << 7), "Reserved"},
2784 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"},
2785 {IFM_10G_LR, (1 << 5), "10GBASE-LR"},
2786 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}
2789 static void
2790 mxge_set_media(mxge_softc_t *sc, int type)
2792 sc->media_flags |= type;
2793 ifmedia_add(&sc->media, sc->media_flags, 0, NULL);
2794 ifmedia_set(&sc->media, sc->media_flags);
2799 * Determine the media type for a NIC. Some XFPs will identify
2800 * themselves only when their link is up, so this is initiated via a
2801 * link up interrupt. However, this can potentially take up to
2802 * several milliseconds, so it is run via the watchdog routine, rather
2803 * than in the interrupt handler itself. This need only be done
2804 * once, not each time the link is up.
2806 static void
2807 mxge_media_probe(mxge_softc_t *sc)
2809 mxge_cmd_t cmd;
2810 char *cage_type;
2811 char *ptr;
2812 struct mxge_media_type *mxge_media_types = NULL;
2813 int i, err, ms, mxge_media_type_entries;
2814 uint32_t byte;
2816 sc->need_media_probe = 0;
2818 /* if we've already set a media type, we're done */
2819 if (sc->media_flags != (IFM_ETHER | IFM_AUTO))
2820 return;
2823 * parse the product code to deterimine the interface type
2824 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2825 * after the 3rd dash in the driver's cached copy of the
2826 * EEPROM's product code string.
2828 ptr = sc->product_code_string;
2829 if (ptr == NULL) {
2830 device_printf(sc->dev, "Missing product code\n");
2833 for (i = 0; i < 3; i++, ptr++) {
2834 ptr = index(ptr, '-');
2835 if (ptr == NULL) {
2836 device_printf(sc->dev,
2837 "only %d dashes in PC?!?\n", i);
2838 return;
2841 if (*ptr == 'C') {
2842 /* -C is CX4 */
2843 mxge_set_media(sc, IFM_10G_CX4);
2844 return;
2846 else if (*ptr == 'Q') {
2847 /* -Q is Quad Ribbon Fiber */
2848 device_printf(sc->dev, "Quad Ribbon Fiber Media\n");
2849 /* FreeBSD has no media type for Quad ribbon fiber */
2850 return;
2853 if (*ptr == 'R') {
2854 /* -R is XFP */
2855 mxge_media_types = mxge_xfp_media_types;
2856 mxge_media_type_entries =
2857 sizeof (mxge_xfp_media_types) /
2858 sizeof (mxge_xfp_media_types[0]);
2859 byte = MXGE_XFP_COMPLIANCE_BYTE;
2860 cage_type = "XFP";
2863 if (*ptr == 'S' || *(ptr +1) == 'S') {
2864 /* -S or -2S is SFP+ */
2865 mxge_media_types = mxge_sfp_media_types;
2866 mxge_media_type_entries =
2867 sizeof (mxge_sfp_media_types) /
2868 sizeof (mxge_sfp_media_types[0]);
2869 cage_type = "SFP+";
2870 byte = 3;
2873 if (mxge_media_types == NULL) {
2874 device_printf(sc->dev, "Unknown media type: %c\n", *ptr);
2875 return;
2879 * At this point we know the NIC has an XFP cage, so now we
2880 * try to determine what is in the cage by using the
2881 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2882 * register. We read just one byte, which may take over
2883 * a millisecond
2886 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */
2887 cmd.data1 = byte;
2888 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd);
2889 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) {
2890 device_printf(sc->dev, "failed to read XFP\n");
2892 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) {
2893 device_printf(sc->dev, "Type R/S with no XFP!?!?\n");
2895 if (err != MXGEFW_CMD_OK) {
2896 return;
2899 /* now we wait for the data to be cached */
2900 cmd.data0 = byte;
2901 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2902 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) {
2903 DELAY(1000);
2904 cmd.data0 = byte;
2905 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd);
2907 if (err != MXGEFW_CMD_OK) {
2908 device_printf(sc->dev, "failed to read %s (%d, %dms)\n",
2909 cage_type, err, ms);
2910 return;
2913 if (cmd.data0 == mxge_media_types[0].bitmask) {
2914 if (mxge_verbose)
2915 device_printf(sc->dev, "%s:%s\n", cage_type,
2916 mxge_media_types[0].name);
2917 mxge_set_media(sc, IFM_10G_CX4);
2918 return;
2920 for (i = 1; i < mxge_media_type_entries; i++) {
2921 if (cmd.data0 & mxge_media_types[i].bitmask) {
2922 if (mxge_verbose)
2923 device_printf(sc->dev, "%s:%s\n",
2924 cage_type,
2925 mxge_media_types[i].name);
2927 mxge_set_media(sc, mxge_media_types[i].flag);
2928 return;
2931 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type,
2932 cmd.data0);
2934 return;
2937 static void
2938 mxge_intr(void *arg)
2940 struct mxge_slice_state *ss = arg;
2941 mxge_softc_t *sc = ss->sc;
2942 mcp_irq_data_t *stats = ss->fw_stats;
2943 mxge_tx_ring_t *tx = &ss->tx;
2944 mxge_rx_done_t *rx_done = &ss->rx_done;
2945 uint32_t send_done_count;
2946 uint8_t valid;
2949 #ifndef IFNET_BUF_RING
2950 /* an interrupt on a non-zero slice is implicitly valid
2951 since MSI-X irqs are not shared */
2952 if (ss != sc->ss) {
2953 mxge_clean_rx_done(ss);
2954 *ss->irq_claim = be32toh(3);
2955 return;
2957 #endif
2959 /* make sure the DMA has finished */
2960 if (!stats->valid) {
2961 return;
2963 valid = stats->valid;
2965 if (sc->legacy_irq) {
2966 /* lower legacy IRQ */
2967 *sc->irq_deassert = 0;
2968 if (!mxge_deassert_wait)
2969 /* don't wait for conf. that irq is low */
2970 stats->valid = 0;
2971 } else {
2972 stats->valid = 0;
2975 /* loop while waiting for legacy irq deassertion */
2976 do {
2977 /* check for transmit completes and receives */
2978 send_done_count = be32toh(stats->send_done_count);
2979 while ((send_done_count != tx->pkt_done) ||
2980 (rx_done->entry[rx_done->idx].length != 0)) {
2981 if (send_done_count != tx->pkt_done)
2982 mxge_tx_done(ss, (int)send_done_count);
2983 mxge_clean_rx_done(ss);
2984 send_done_count = be32toh(stats->send_done_count);
2986 if (sc->legacy_irq && mxge_deassert_wait)
2987 wmb();
2988 } while (*((volatile uint8_t *) &stats->valid));
2990 /* fw link & error stats meaningful only on the first slice */
2991 if (__predict_false((ss == sc->ss) && stats->stats_updated)) {
2992 if (sc->link_state != stats->link_up) {
2993 sc->link_state = stats->link_up;
2994 if (sc->link_state) {
2995 sc->ifp->if_link_state = LINK_STATE_UP;
2996 if_link_state_change(sc->ifp);
2997 if (mxge_verbose)
2998 device_printf(sc->dev, "link up\n");
2999 } else {
3000 sc->ifp->if_link_state = LINK_STATE_DOWN;
3001 if_link_state_change(sc->ifp);
3002 if (mxge_verbose)
3003 device_printf(sc->dev, "link down\n");
3005 sc->need_media_probe = 1;
3007 if (sc->rdma_tags_available !=
3008 be32toh(stats->rdma_tags_available)) {
3009 sc->rdma_tags_available =
3010 be32toh(stats->rdma_tags_available);
3011 device_printf(sc->dev, "RDMA timed out! %d tags "
3012 "left\n", sc->rdma_tags_available);
3015 if (stats->link_down) {
3016 sc->down_cnt += stats->link_down;
3017 sc->link_state = 0;
3018 sc->ifp->if_link_state = LINK_STATE_DOWN;
3019 if_link_state_change(sc->ifp);
3023 /* check to see if we have rx token to pass back */
3024 if (valid & 0x1)
3025 *ss->irq_claim = be32toh(3);
3026 *(ss->irq_claim + 1) = be32toh(3);
3029 static void
3030 mxge_init(void *arg)
3036 static void
3037 mxge_free_slice_mbufs(struct mxge_slice_state *ss)
3039 struct lro_entry *lro_entry;
3040 int i;
3042 while (!SLIST_EMPTY(&ss->lro_free)) {
3043 lro_entry = SLIST_FIRST(&ss->lro_free);
3044 SLIST_REMOVE_HEAD(&ss->lro_free, next);
3045 kfree(lro_entry, M_DEVBUF);
3048 for (i = 0; i <= ss->rx_big.mask; i++) {
3049 if (ss->rx_big.info[i].m == NULL)
3050 continue;
3051 bus_dmamap_unload(ss->rx_big.dmat,
3052 ss->rx_big.info[i].map);
3053 m_freem(ss->rx_big.info[i].m);
3054 ss->rx_big.info[i].m = NULL;
3057 for (i = 0; i <= ss->rx_small.mask; i++) {
3058 if (ss->rx_small.info[i].m == NULL)
3059 continue;
3060 bus_dmamap_unload(ss->rx_small.dmat,
3061 ss->rx_small.info[i].map);
3062 m_freem(ss->rx_small.info[i].m);
3063 ss->rx_small.info[i].m = NULL;
3066 /* transmit ring used only on the first slice */
3067 if (ss->tx.info == NULL)
3068 return;
3070 for (i = 0; i <= ss->tx.mask; i++) {
3071 ss->tx.info[i].flag = 0;
3072 if (ss->tx.info[i].m == NULL)
3073 continue;
3074 bus_dmamap_unload(ss->tx.dmat,
3075 ss->tx.info[i].map);
3076 m_freem(ss->tx.info[i].m);
3077 ss->tx.info[i].m = NULL;
3081 static void
3082 mxge_free_mbufs(mxge_softc_t *sc)
3084 int slice;
3086 for (slice = 0; slice < sc->num_slices; slice++)
3087 mxge_free_slice_mbufs(&sc->ss[slice]);
3090 static void
3091 mxge_free_slice_rings(struct mxge_slice_state *ss)
3093 int i;
3096 if (ss->rx_done.entry != NULL)
3097 mxge_dma_free(&ss->rx_done.dma);
3098 ss->rx_done.entry = NULL;
3100 if (ss->tx.req_bytes != NULL)
3101 kfree(ss->tx.req_bytes, M_DEVBUF);
3102 ss->tx.req_bytes = NULL;
3104 if (ss->tx.seg_list != NULL)
3105 kfree(ss->tx.seg_list, M_DEVBUF);
3106 ss->tx.seg_list = NULL;
3108 if (ss->rx_small.shadow != NULL)
3109 kfree(ss->rx_small.shadow, M_DEVBUF);
3110 ss->rx_small.shadow = NULL;
3112 if (ss->rx_big.shadow != NULL)
3113 kfree(ss->rx_big.shadow, M_DEVBUF);
3114 ss->rx_big.shadow = NULL;
3116 if (ss->tx.info != NULL) {
3117 if (ss->tx.dmat != NULL) {
3118 for (i = 0; i <= ss->tx.mask; i++) {
3119 bus_dmamap_destroy(ss->tx.dmat,
3120 ss->tx.info[i].map);
3122 bus_dma_tag_destroy(ss->tx.dmat);
3124 kfree(ss->tx.info, M_DEVBUF);
3126 ss->tx.info = NULL;
3128 if (ss->rx_small.info != NULL) {
3129 if (ss->rx_small.dmat != NULL) {
3130 for (i = 0; i <= ss->rx_small.mask; i++) {
3131 bus_dmamap_destroy(ss->rx_small.dmat,
3132 ss->rx_small.info[i].map);
3134 bus_dmamap_destroy(ss->rx_small.dmat,
3135 ss->rx_small.extra_map);
3136 bus_dma_tag_destroy(ss->rx_small.dmat);
3138 kfree(ss->rx_small.info, M_DEVBUF);
3140 ss->rx_small.info = NULL;
3142 if (ss->rx_big.info != NULL) {
3143 if (ss->rx_big.dmat != NULL) {
3144 for (i = 0; i <= ss->rx_big.mask; i++) {
3145 bus_dmamap_destroy(ss->rx_big.dmat,
3146 ss->rx_big.info[i].map);
3148 bus_dmamap_destroy(ss->rx_big.dmat,
3149 ss->rx_big.extra_map);
3150 bus_dma_tag_destroy(ss->rx_big.dmat);
3152 kfree(ss->rx_big.info, M_DEVBUF);
3154 ss->rx_big.info = NULL;
3157 static void
3158 mxge_free_rings(mxge_softc_t *sc)
3160 int slice;
3162 for (slice = 0; slice < sc->num_slices; slice++)
3163 mxge_free_slice_rings(&sc->ss[slice]);
3166 static int
3167 mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries,
3168 int tx_ring_entries)
3170 mxge_softc_t *sc = ss->sc;
3171 size_t bytes;
3172 int err, i;
3174 err = ENOMEM;
3176 /* allocate per-slice receive resources */
3178 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1;
3179 ss->rx_done.mask = (2 * rx_ring_entries) - 1;
3181 /* allocate the rx shadow rings */
3182 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow);
3183 ss->rx_small.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3184 if (ss->rx_small.shadow == NULL)
3185 return err;;
3187 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow);
3188 ss->rx_big.shadow = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3189 if (ss->rx_big.shadow == NULL)
3190 return err;;
3192 /* allocate the rx host info rings */
3193 bytes = rx_ring_entries * sizeof (*ss->rx_small.info);
3194 ss->rx_small.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3195 if (ss->rx_small.info == NULL)
3196 return err;;
3198 bytes = rx_ring_entries * sizeof (*ss->rx_big.info);
3199 ss->rx_big.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3200 if (ss->rx_big.info == NULL)
3201 return err;;
3203 /* allocate the rx busdma resources */
3204 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3205 1, /* alignment */
3206 4096, /* boundary */
3207 BUS_SPACE_MAXADDR, /* low */
3208 BUS_SPACE_MAXADDR, /* high */
3209 NULL, NULL, /* filter */
3210 MHLEN, /* maxsize */
3211 1, /* num segs */
3212 MHLEN, /* maxsegsize */
3213 BUS_DMA_ALLOCNOW, /* flags */
3214 &ss->rx_small.dmat); /* tag */
3215 if (err != 0) {
3216 device_printf(sc->dev, "Err %d allocating rx_small dmat\n",
3217 err);
3218 return err;;
3221 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3222 1, /* alignment */
3223 #if MXGE_VIRT_JUMBOS
3224 4096, /* boundary */
3225 #else
3226 0, /* boundary */
3227 #endif
3228 BUS_SPACE_MAXADDR, /* low */
3229 BUS_SPACE_MAXADDR, /* high */
3230 NULL, NULL, /* filter */
3231 3*4096, /* maxsize */
3232 #if MXGE_VIRT_JUMBOS
3233 3, /* num segs */
3234 4096, /* maxsegsize*/
3235 #else
3236 1, /* num segs */
3237 MJUM9BYTES, /* maxsegsize*/
3238 #endif
3239 BUS_DMA_ALLOCNOW, /* flags */
3240 &ss->rx_big.dmat); /* tag */
3241 if (err != 0) {
3242 device_printf(sc->dev, "Err %d allocating rx_big dmat\n",
3243 err);
3244 return err;;
3246 for (i = 0; i <= ss->rx_small.mask; i++) {
3247 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3248 &ss->rx_small.info[i].map);
3249 if (err != 0) {
3250 device_printf(sc->dev, "Err %d rx_small dmamap\n",
3251 err);
3252 return err;;
3255 err = bus_dmamap_create(ss->rx_small.dmat, 0,
3256 &ss->rx_small.extra_map);
3257 if (err != 0) {
3258 device_printf(sc->dev, "Err %d extra rx_small dmamap\n",
3259 err);
3260 return err;;
3263 for (i = 0; i <= ss->rx_big.mask; i++) {
3264 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3265 &ss->rx_big.info[i].map);
3266 if (err != 0) {
3267 device_printf(sc->dev, "Err %d rx_big dmamap\n",
3268 err);
3269 return err;;
3272 err = bus_dmamap_create(ss->rx_big.dmat, 0,
3273 &ss->rx_big.extra_map);
3274 if (err != 0) {
3275 device_printf(sc->dev, "Err %d extra rx_big dmamap\n",
3276 err);
3277 return err;;
3280 /* now allocate TX resouces */
3282 #ifndef IFNET_BUF_RING
3283 /* only use a single TX ring for now */
3284 if (ss != ss->sc->ss)
3285 return 0;
3286 #endif
3288 ss->tx.mask = tx_ring_entries - 1;
3289 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4);
3292 /* allocate the tx request copy block */
3293 bytes = 8 +
3294 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4);
3295 ss->tx.req_bytes = kmalloc(bytes, M_DEVBUF, M_WAITOK);
3296 if (ss->tx.req_bytes == NULL)
3297 return err;;
3298 /* ensure req_list entries are aligned to 8 bytes */
3299 ss->tx.req_list = (mcp_kreq_ether_send_t *)
3300 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL);
3302 /* allocate the tx busdma segment list */
3303 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc;
3304 ss->tx.seg_list = (bus_dma_segment_t *)
3305 kmalloc(bytes, M_DEVBUF, M_WAITOK);
3306 if (ss->tx.seg_list == NULL)
3307 return err;;
3309 /* allocate the tx host info ring */
3310 bytes = tx_ring_entries * sizeof (*ss->tx.info);
3311 ss->tx.info = kmalloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK);
3312 if (ss->tx.info == NULL)
3313 return err;;
3315 /* allocate the tx busdma resources */
3316 err = bus_dma_tag_create(sc->parent_dmat, /* parent */
3317 1, /* alignment */
3318 sc->tx_boundary, /* boundary */
3319 BUS_SPACE_MAXADDR, /* low */
3320 BUS_SPACE_MAXADDR, /* high */
3321 NULL, NULL, /* filter */
3322 65536 + 256, /* maxsize */
3323 ss->tx.max_desc - 2, /* num segs */
3324 sc->tx_boundary, /* maxsegsz */
3325 BUS_DMA_ALLOCNOW, /* flags */
3326 &ss->tx.dmat); /* tag */
3328 if (err != 0) {
3329 device_printf(sc->dev, "Err %d allocating tx dmat\n",
3330 err);
3331 return err;;
3334 /* now use these tags to setup dmamaps for each slot
3335 in the ring */
3336 for (i = 0; i <= ss->tx.mask; i++) {
3337 err = bus_dmamap_create(ss->tx.dmat, 0,
3338 &ss->tx.info[i].map);
3339 if (err != 0) {
3340 device_printf(sc->dev, "Err %d tx dmamap\n",
3341 err);
3342 return err;;
3345 return 0;
3349 static int
3350 mxge_alloc_rings(mxge_softc_t *sc)
3352 mxge_cmd_t cmd;
3353 int tx_ring_size;
3354 int tx_ring_entries, rx_ring_entries;
3355 int err, slice;
3357 /* get ring sizes */
3358 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd);
3359 tx_ring_size = cmd.data0;
3360 if (err != 0) {
3361 device_printf(sc->dev, "Cannot determine tx ring sizes\n");
3362 goto abort;
3365 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t);
3366 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t);
3367 ifq_set_maxlen(&sc->ifp->if_snd, tx_ring_entries - 1);
3368 ifq_set_ready(&sc->ifp->if_snd);
3370 for (slice = 0; slice < sc->num_slices; slice++) {
3371 err = mxge_alloc_slice_rings(&sc->ss[slice],
3372 rx_ring_entries,
3373 tx_ring_entries);
3374 if (err != 0)
3375 goto abort;
3377 return 0;
3379 abort:
3380 mxge_free_rings(sc);
3381 return err;
3386 static void
3387 mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs)
3389 int bufsize = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN + MXGEFW_PAD;
3391 if (bufsize < MCLBYTES) {
3392 /* easy, everything fits in a single buffer */
3393 *big_buf_size = MCLBYTES;
3394 *cl_size = MCLBYTES;
3395 *nbufs = 1;
3396 return;
3399 if (bufsize < MJUMPAGESIZE) {
3400 /* still easy, everything still fits in a single buffer */
3401 *big_buf_size = MJUMPAGESIZE;
3402 *cl_size = MJUMPAGESIZE;
3403 *nbufs = 1;
3404 return;
3406 #if MXGE_VIRT_JUMBOS
3407 /* now we need to use virtually contiguous buffers */
3408 *cl_size = MJUM9BYTES;
3409 *big_buf_size = 4096;
3410 *nbufs = mtu / 4096 + 1;
3411 /* needs to be a power of two, so round up */
3412 if (*nbufs == 3)
3413 *nbufs = 4;
3414 #else
3415 *cl_size = MJUM9BYTES;
3416 *big_buf_size = MJUM9BYTES;
3417 *nbufs = 1;
3418 #endif
3421 static int
3422 mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
3424 mxge_softc_t *sc;
3425 mxge_cmd_t cmd;
3426 bus_dmamap_t map;
3427 struct lro_entry *lro_entry;
3428 int err, i, slice;
3431 sc = ss->sc;
3432 slice = ss - sc->ss;
3434 SLIST_INIT(&ss->lro_free);
3435 SLIST_INIT(&ss->lro_active);
3437 for (i = 0; i < sc->lro_cnt; i++) {
3438 lro_entry = (struct lro_entry *)
3439 kmalloc(sizeof (*lro_entry), M_DEVBUF,
3440 M_NOWAIT | M_ZERO);
3441 if (lro_entry == NULL) {
3442 sc->lro_cnt = i;
3443 break;
3445 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
3447 /* get the lanai pointers to the send and receive rings */
3449 err = 0;
3450 #ifndef IFNET_BUF_RING
3451 /* We currently only send from the first slice */
3452 if (slice == 0) {
3453 #endif
3454 cmd.data0 = slice;
3455 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd);
3456 ss->tx.lanai =
3457 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0);
3458 ss->tx.send_go = (volatile uint32_t *)
3459 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
3460 ss->tx.send_stop = (volatile uint32_t *)
3461 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
3462 #ifndef IFNET_BUF_RING
3464 #endif
3465 cmd.data0 = slice;
3466 err |= mxge_send_cmd(sc,
3467 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd);
3468 ss->rx_small.lanai =
3469 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3470 cmd.data0 = slice;
3471 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd);
3472 ss->rx_big.lanai =
3473 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0);
3475 if (err != 0) {
3476 device_printf(sc->dev,
3477 "failed to get ring sizes or locations\n");
3478 return EIO;
3481 /* stock receive rings */
3482 for (i = 0; i <= ss->rx_small.mask; i++) {
3483 map = ss->rx_small.info[i].map;
3484 err = mxge_get_buf_small(ss, map, i);
3485 if (err) {
3486 device_printf(sc->dev, "alloced %d/%d smalls\n",
3487 i, ss->rx_small.mask + 1);
3488 return ENOMEM;
3491 for (i = 0; i <= ss->rx_big.mask; i++) {
3492 ss->rx_big.shadow[i].addr_low = 0xffffffff;
3493 ss->rx_big.shadow[i].addr_high = 0xffffffff;
3495 ss->rx_big.nbufs = nbufs;
3496 ss->rx_big.cl_size = cl_size;
3497 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN +
3498 EVL_ENCAPLEN + MXGEFW_PAD;
3499 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) {
3500 map = ss->rx_big.info[i].map;
3501 err = mxge_get_buf_big(ss, map, i);
3502 if (err) {
3503 device_printf(sc->dev, "alloced %d/%d bigs\n",
3504 i, ss->rx_big.mask + 1);
3505 return ENOMEM;
3508 return 0;
3511 static int
3512 mxge_open(mxge_softc_t *sc)
3514 mxge_cmd_t cmd;
3515 int err, big_bytes, nbufs, slice, cl_size, i;
3516 bus_addr_t bus;
3517 volatile uint8_t *itable;
3518 struct mxge_slice_state *ss;
3520 ASSERT_SERIALIZED(sc->ifp->if_serializer);
3521 /* Copy the MAC address in case it was overridden */
3522 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN);
3524 err = mxge_reset(sc, 1);
3525 if (err != 0) {
3526 device_printf(sc->dev, "failed to reset\n");
3527 return EIO;
3530 if (sc->num_slices > 1) {
3531 /* setup the indirection table */
3532 cmd.data0 = sc->num_slices;
3533 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE,
3534 &cmd);
3536 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET,
3537 &cmd);
3538 if (err != 0) {
3539 device_printf(sc->dev,
3540 "failed to setup rss tables\n");
3541 return err;
3544 /* just enable an identity mapping */
3545 itable = sc->sram + cmd.data0;
3546 for (i = 0; i < sc->num_slices; i++)
3547 itable[i] = (uint8_t)i;
3549 cmd.data0 = 1;
3550 cmd.data1 = mxge_rss_hash_type;
3551 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd);
3552 if (err != 0) {
3553 device_printf(sc->dev, "failed to enable slices\n");
3554 return err;
3559 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs);
3561 cmd.data0 = nbufs;
3562 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS,
3563 &cmd);
3564 /* error is only meaningful if we're trying to set
3565 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3566 if (err && nbufs > 1) {
3567 device_printf(sc->dev,
3568 "Failed to set alway-use-n to %d\n",
3569 nbufs);
3570 return EIO;
3572 /* Give the firmware the mtu and the big and small buffer
3573 sizes. The firmware wants the big buf size to be a power
3574 of two. Luckily, FreeBSD's clusters are powers of two */
3575 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3576 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd);
3577 cmd.data0 = MHLEN - MXGEFW_PAD;
3578 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE,
3579 &cmd);
3580 cmd.data0 = big_bytes;
3581 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd);
3583 if (err != 0) {
3584 device_printf(sc->dev, "failed to setup params\n");
3585 goto abort;
3588 /* Now give him the pointer to the stats block */
3589 for (slice = 0;
3590 #ifdef IFNET_BUF_RING
3591 slice < sc->num_slices;
3592 #else
3593 slice < 1;
3594 #endif
3595 slice++) {
3596 ss = &sc->ss[slice];
3597 cmd.data0 =
3598 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr);
3599 cmd.data1 =
3600 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr);
3601 cmd.data2 = sizeof(struct mcp_irq_data);
3602 cmd.data2 |= (slice << 16);
3603 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd);
3606 if (err != 0) {
3607 bus = sc->ss->fw_stats_dma.bus_addr;
3608 bus += offsetof(struct mcp_irq_data, send_done_count);
3609 cmd.data0 = MXGE_LOWPART_TO_U32(bus);
3610 cmd.data1 = MXGE_HIGHPART_TO_U32(bus);
3611 err = mxge_send_cmd(sc,
3612 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE,
3613 &cmd);
3614 /* Firmware cannot support multicast without STATS_DMA_V2 */
3615 sc->fw_multicast_support = 0;
3616 } else {
3617 sc->fw_multicast_support = 1;
3620 if (err != 0) {
3621 device_printf(sc->dev, "failed to setup params\n");
3622 goto abort;
3625 for (slice = 0; slice < sc->num_slices; slice++) {
3626 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size);
3627 if (err != 0) {
3628 device_printf(sc->dev, "couldn't open slice %d\n",
3629 slice);
3630 goto abort;
3634 /* Finally, start the firmware running */
3635 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd);
3636 if (err) {
3637 device_printf(sc->dev, "Couldn't bring up link\n");
3638 goto abort;
3640 #ifdef IFNET_BUF_RING
3641 for (slice = 0; slice < sc->num_slices; slice++) {
3642 ss = &sc->ss[slice];
3643 ss->if_flags |= IFF_RUNNING;
3644 ss->if_flags &= ~IFF_OACTIVE;
3646 #endif
3647 sc->ifp->if_flags |= IFF_RUNNING;
3648 sc->ifp->if_flags &= ~IFF_OACTIVE;
3649 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3651 return 0;
3654 abort:
3655 mxge_free_mbufs(sc);
3657 return err;
3660 static int
3661 mxge_close(mxge_softc_t *sc)
3663 mxge_cmd_t cmd;
3664 int err, old_down_cnt;
3665 #ifdef IFNET_BUF_RING
3666 struct mxge_slice_state *ss;
3667 int slice;
3668 #endif
3670 ASSERT_SERIALIZED(sc->ifp->if_serializer);
3671 callout_stop(&sc->co_hdl);
3672 #ifdef IFNET_BUF_RING
3673 for (slice = 0; slice < sc->num_slices; slice++) {
3674 ss = &sc->ss[slice];
3675 ss->if_flags &= ~IFF_RUNNING;
3677 #endif
3678 sc->ifp->if_flags &= ~IFF_RUNNING;
3679 old_down_cnt = sc->down_cnt;
3680 wmb();
3681 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
3682 if (err) {
3683 device_printf(sc->dev, "Couldn't bring down link\n");
3685 if (old_down_cnt == sc->down_cnt) {
3686 /* wait for down irq */
3687 DELAY(10 * sc->intr_coal_delay);
3689 wmb();
3690 if (old_down_cnt == sc->down_cnt) {
3691 device_printf(sc->dev, "never got down irq\n");
3694 mxge_free_mbufs(sc);
3696 return 0;
3699 static void
3700 mxge_setup_cfg_space(mxge_softc_t *sc)
3702 device_t dev = sc->dev;
3703 int reg;
3704 uint16_t cmd, lnk, pectl;
3706 /* find the PCIe link width and set max read request to 4KB*/
3707 if (pci_find_extcap(dev, PCIY_EXPRESS, &reg) == 0) {
3708 lnk = pci_read_config(dev, reg + 0x12, 2);
3709 sc->link_width = (lnk >> 4) & 0x3f;
3711 pectl = pci_read_config(dev, reg + 0x8, 2);
3712 pectl = (pectl & ~0x7000) | (5 << 12);
3713 pci_write_config(dev, reg + 0x8, pectl, 2);
3716 /* Enable DMA and Memory space access */
3717 pci_enable_busmaster(dev);
3718 cmd = pci_read_config(dev, PCIR_COMMAND, 2);
3719 cmd |= PCIM_CMD_MEMEN;
3720 pci_write_config(dev, PCIR_COMMAND, cmd, 2);
3723 static uint32_t
3724 mxge_read_reboot(mxge_softc_t *sc)
3726 device_t dev = sc->dev;
3727 uint32_t vs;
3729 /* find the vendor specific offset */
3730 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) {
3731 device_printf(sc->dev,
3732 "could not find vendor specific offset\n");
3733 return (uint32_t)-1;
3735 /* enable read32 mode */
3736 pci_write_config(dev, vs + 0x10, 0x3, 1);
3737 /* tell NIC which register to read */
3738 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4);
3739 return (pci_read_config(dev, vs + 0x14, 4));
3742 static int
3743 mxge_watchdog_reset(mxge_softc_t *sc, int slice)
3745 struct pci_devinfo *dinfo;
3746 mxge_tx_ring_t *tx;
3747 int err;
3748 uint32_t reboot;
3749 uint16_t cmd;
3751 err = ENXIO;
3753 device_printf(sc->dev, "Watchdog reset!\n");
3756 * check to see if the NIC rebooted. If it did, then all of
3757 * PCI config space has been reset, and things like the
3758 * busmaster bit will be zero. If this is the case, then we
3759 * must restore PCI config space before the NIC can be used
3760 * again
3762 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3763 if (cmd == 0xffff) {
3765 * maybe the watchdog caught the NIC rebooting; wait
3766 * up to 100ms for it to finish. If it does not come
3767 * back, then give up
3769 DELAY(1000*100);
3770 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2);
3771 if (cmd == 0xffff) {
3772 device_printf(sc->dev, "NIC disappeared!\n");
3773 return (err);
3776 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) {
3777 /* print the reboot status */
3778 reboot = mxge_read_reboot(sc);
3779 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
3780 reboot);
3781 /* restore PCI configuration space */
3782 dinfo = device_get_ivars(sc->dev);
3783 pci_cfg_restore(sc->dev, dinfo);
3785 /* and redo any changes we made to our config space */
3786 mxge_setup_cfg_space(sc);
3788 if (sc->ifp->if_flags & IFF_RUNNING) {
3789 mxge_close(sc);
3790 err = mxge_open(sc);
3792 } else {
3793 tx = &sc->ss[slice].tx;
3794 device_printf(sc->dev,
3795 "NIC did not reboot, slice %d ring state:\n",
3796 slice);
3797 device_printf(sc->dev,
3798 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3799 tx->req, tx->done, tx->queue_active);
3800 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n",
3801 tx->activate, tx->deactivate);
3802 device_printf(sc->dev, "pkt_done=%d fw=%d\n",
3803 tx->pkt_done,
3804 be32toh(sc->ss->fw_stats->send_done_count));
3805 device_printf(sc->dev, "not resetting\n");
3807 return (err);
3810 static int
3811 mxge_watchdog(mxge_softc_t *sc)
3813 mxge_tx_ring_t *tx;
3814 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause);
3815 int i, err = 0;
3817 /* see if we have outstanding transmits, which
3818 have been pending for more than mxge_ticks */
3819 for (i = 0;
3820 #ifdef IFNET_BUF_RING
3821 (i < sc->num_slices) && (err == 0);
3822 #else
3823 (i < 1) && (err == 0);
3824 #endif
3825 i++) {
3826 tx = &sc->ss[i].tx;
3827 if (tx->req != tx->done &&
3828 tx->watchdog_req != tx->watchdog_done &&
3829 tx->done == tx->watchdog_done) {
3830 /* check for pause blocking before resetting */
3831 if (tx->watchdog_rx_pause == rx_pause)
3832 err = mxge_watchdog_reset(sc, i);
3833 else
3834 device_printf(sc->dev, "Flow control blocking "
3835 "xmits, check link partner\n");
3838 tx->watchdog_req = tx->req;
3839 tx->watchdog_done = tx->done;
3840 tx->watchdog_rx_pause = rx_pause;
3843 if (sc->need_media_probe)
3844 mxge_media_probe(sc);
3845 return (err);
3848 static void
3849 mxge_update_stats(mxge_softc_t *sc)
3851 struct mxge_slice_state *ss;
3852 u_long ipackets = 0;
3853 u_long opackets = 0;
3854 #ifdef IFNET_BUF_RING
3855 u_long obytes = 0;
3856 u_long omcasts = 0;
3857 u_long odrops = 0;
3858 #endif
3859 u_long oerrors = 0;
3860 int slice;
3862 for (slice = 0; slice < sc->num_slices; slice++) {
3863 ss = &sc->ss[slice];
3864 ipackets += ss->ipackets;
3865 opackets += ss->opackets;
3866 #ifdef IFNET_BUF_RING
3867 obytes += ss->obytes;
3868 omcasts += ss->omcasts;
3869 odrops += ss->tx.br->br_drops;
3870 #endif
3871 oerrors += ss->oerrors;
3873 sc->ifp->if_ipackets = ipackets;
3874 sc->ifp->if_opackets = opackets;
3875 #ifdef IFNET_BUF_RING
3876 sc->ifp->if_obytes = obytes;
3877 sc->ifp->if_omcasts = omcasts;
3878 sc->ifp->if_snd.ifq_drops = odrops;
3879 #endif
3880 sc->ifp->if_oerrors = oerrors;
3883 static void
3884 mxge_tick(void *arg)
3886 mxge_softc_t *sc = arg;
3887 int err = 0;
3889 lwkt_serialize_enter(sc->ifp->if_serializer);
3890 /* aggregate stats from different slices */
3891 mxge_update_stats(sc);
3892 if (!sc->watchdog_countdown) {
3893 err = mxge_watchdog(sc);
3894 sc->watchdog_countdown = 4;
3896 sc->watchdog_countdown--;
3897 if (err == 0)
3898 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc);
3899 lwkt_serialize_exit(sc->ifp->if_serializer);
3902 static int
3903 mxge_media_change(struct ifnet *ifp)
3905 return EINVAL;
3908 static int
3909 mxge_change_mtu(mxge_softc_t *sc, int mtu)
3911 struct ifnet *ifp = sc->ifp;
3912 int real_mtu, old_mtu;
3913 int err = 0;
3915 if (ifp->if_serializer)
3916 ASSERT_SERIALIZED(ifp->if_serializer);
3918 real_mtu = mtu + ETHER_HDR_LEN + EVL_ENCAPLEN;
3919 if ((real_mtu > sc->max_mtu) || real_mtu < 60)
3920 return EINVAL;
3921 old_mtu = ifp->if_mtu;
3922 ifp->if_mtu = mtu;
3923 if (ifp->if_flags & IFF_RUNNING) {
3924 mxge_close(sc);
3925 err = mxge_open(sc);
3926 if (err != 0) {
3927 ifp->if_mtu = old_mtu;
3928 mxge_close(sc);
3929 (void) mxge_open(sc);
3932 return err;
3935 static void
3936 mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3938 mxge_softc_t *sc = ifp->if_softc;
3941 if (sc == NULL)
3942 return;
3943 ifmr->ifm_status = IFM_AVALID;
3944 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0;
3945 ifmr->ifm_active = IFM_AUTO | IFM_ETHER;
3946 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0;
3949 static int
3950 mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
3952 mxge_softc_t *sc = ifp->if_softc;
3953 struct ifreq *ifr = (struct ifreq *)data;
3954 int err, mask;
3956 (void)cr;
3957 err = 0;
3958 ASSERT_SERIALIZED(ifp->if_serializer);
3959 switch (command) {
3960 case SIOCSIFADDR:
3961 case SIOCGIFADDR:
3962 err = ether_ioctl(ifp, command, data);
3963 break;
3965 case SIOCSIFMTU:
3966 err = mxge_change_mtu(sc, ifr->ifr_mtu);
3967 break;
3969 case SIOCSIFFLAGS:
3970 if (sc->dying) {
3971 return EINVAL;
3973 if (ifp->if_flags & IFF_UP) {
3974 if (!(ifp->if_flags & IFF_RUNNING)) {
3975 err = mxge_open(sc);
3976 } else {
3977 /* take care of promis can allmulti
3978 flag chages */
3979 mxge_change_promisc(sc,
3980 ifp->if_flags & IFF_PROMISC);
3981 mxge_set_multicast_list(sc);
3983 } else {
3984 if (ifp->if_flags & IFF_RUNNING) {
3985 mxge_close(sc);
3988 break;
3990 case SIOCADDMULTI:
3991 case SIOCDELMULTI:
3992 mxge_set_multicast_list(sc);
3993 break;
3995 case SIOCSIFCAP:
3996 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3997 if (mask & IFCAP_TXCSUM) {
3998 if (IFCAP_TXCSUM & ifp->if_capenable) {
3999 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
4000 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
4001 | CSUM_TSO);
4002 } else {
4003 ifp->if_capenable |= IFCAP_TXCSUM;
4004 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
4006 } else if (mask & IFCAP_RXCSUM) {
4007 if (IFCAP_RXCSUM & ifp->if_capenable) {
4008 ifp->if_capenable &= ~IFCAP_RXCSUM;
4009 sc->csum_flag = 0;
4010 } else {
4011 ifp->if_capenable |= IFCAP_RXCSUM;
4012 sc->csum_flag = 1;
4015 if (mask & IFCAP_TSO4) {
4016 if (IFCAP_TSO4 & ifp->if_capenable) {
4017 ifp->if_capenable &= ~IFCAP_TSO4;
4018 ifp->if_hwassist &= ~CSUM_TSO;
4019 } else if (IFCAP_TXCSUM & ifp->if_capenable) {
4020 ifp->if_capenable |= IFCAP_TSO4;
4021 ifp->if_hwassist |= CSUM_TSO;
4022 } else {
4023 kprintf("mxge requires tx checksum offload"
4024 " be enabled to use TSO\n");
4025 err = EINVAL;
4028 if (mask & IFCAP_LRO) {
4029 if (IFCAP_LRO & ifp->if_capenable)
4030 err = mxge_change_lro_locked(sc, 0);
4031 else
4032 err = mxge_change_lro_locked(sc, mxge_lro_cnt);
4034 if (mask & IFCAP_VLAN_HWTAGGING)
4035 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
4036 VLAN_CAPABILITIES(ifp);
4038 break;
4040 case SIOCGIFMEDIA:
4041 err = ifmedia_ioctl(ifp, (struct ifreq *)data,
4042 &sc->media, command);
4043 break;
4045 default:
4046 err = ENOTTY;
4048 return err;
4051 static void
4052 mxge_fetch_tunables(mxge_softc_t *sc)
4055 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices);
4056 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4057 &mxge_flow_control);
4058 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4059 &mxge_intr_coal_delay);
4060 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4061 &mxge_nvidia_ecrc_enable);
4062 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4063 &mxge_force_firmware);
4064 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4065 &mxge_deassert_wait);
4066 TUNABLE_INT_FETCH("hw.mxge.verbose",
4067 &mxge_verbose);
4068 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
4069 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
4070 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
4071 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
4072 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
4073 if (sc->lro_cnt != 0)
4074 mxge_lro_cnt = sc->lro_cnt;
4076 if (bootverbose)
4077 mxge_verbose = 1;
4078 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000)
4079 mxge_intr_coal_delay = 30;
4080 if (mxge_ticks == 0)
4081 mxge_ticks = hz / 2;
4082 sc->pause = mxge_flow_control;
4083 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4
4084 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) {
4085 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT;
4087 if (mxge_initial_mtu > ETHERMTU_JUMBO ||
4088 mxge_initial_mtu < ETHER_MIN_LEN)
4089 mxge_initial_mtu = ETHERMTU_JUMBO;
4093 static void
4094 mxge_free_slices(mxge_softc_t *sc)
4096 struct mxge_slice_state *ss;
4097 int i;
4100 if (sc->ss == NULL)
4101 return;
4103 for (i = 0; i < sc->num_slices; i++) {
4104 ss = &sc->ss[i];
4105 if (ss->fw_stats != NULL) {
4106 mxge_dma_free(&ss->fw_stats_dma);
4107 ss->fw_stats = NULL;
4108 #ifdef IFNET_BUF_RING
4109 if (ss->tx.br != NULL) {
4110 drbr_free(ss->tx.br, M_DEVBUF);
4111 ss->tx.br = NULL;
4113 #endif
4115 if (ss->rx_done.entry != NULL) {
4116 mxge_dma_free(&ss->rx_done.dma);
4117 ss->rx_done.entry = NULL;
4120 kfree(sc->ss, M_DEVBUF);
4121 sc->ss = NULL;
4124 static int
4125 mxge_alloc_slices(mxge_softc_t *sc)
4127 mxge_cmd_t cmd;
4128 struct mxge_slice_state *ss;
4129 size_t bytes;
4130 int err, i, max_intr_slots;
4132 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4133 if (err != 0) {
4134 device_printf(sc->dev, "Cannot determine rx ring size\n");
4135 return err;
4137 sc->rx_ring_size = cmd.data0;
4138 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t));
4140 bytes = sizeof (*sc->ss) * sc->num_slices;
4141 sc->ss = kmalloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO);
4142 if (sc->ss == NULL)
4143 return (ENOMEM);
4144 for (i = 0; i < sc->num_slices; i++) {
4145 ss = &sc->ss[i];
4147 ss->sc = sc;
4149 /* allocate per-slice rx interrupt queues */
4151 bytes = max_intr_slots * sizeof (*ss->rx_done.entry);
4152 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096);
4153 if (err != 0)
4154 goto abort;
4155 ss->rx_done.entry = ss->rx_done.dma.addr;
4156 bzero(ss->rx_done.entry, bytes);
4159 * allocate the per-slice firmware stats; stats
4160 * (including tx) are used used only on the first
4161 * slice for now
4163 #ifndef IFNET_BUF_RING
4164 if (i > 0)
4165 continue;
4166 #endif
4168 bytes = sizeof (*ss->fw_stats);
4169 err = mxge_dma_alloc(sc, &ss->fw_stats_dma,
4170 sizeof (*ss->fw_stats), 64);
4171 if (err != 0)
4172 goto abort;
4173 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr;
4174 #ifdef IFNET_BUF_RING
4175 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK,
4176 &ss->tx.lock);
4177 #endif
4180 return (0);
4182 abort:
4183 mxge_free_slices(sc);
4184 return (ENOMEM);
4187 static void
4188 mxge_slice_probe(mxge_softc_t *sc)
4190 mxge_cmd_t cmd;
4191 char *old_fw;
4192 int msix_cnt, status, max_intr_slots;
4194 sc->num_slices = 1;
4196 * don't enable multiple slices if they are not enabled,
4197 * or if this is not an SMP system
4200 if (mxge_max_slices == 0 || mxge_max_slices == 1 || ncpus < 2)
4201 return;
4203 /* see how many MSI-X interrupts are available */
4204 msix_cnt = pci_msix_count(sc->dev);
4205 if (msix_cnt < 2)
4206 return;
4208 /* now load the slice aware firmware see what it supports */
4209 old_fw = sc->fw_name;
4210 if (old_fw == mxge_fw_aligned)
4211 sc->fw_name = mxge_fw_rss_aligned;
4212 else
4213 sc->fw_name = mxge_fw_rss_unaligned;
4214 status = mxge_load_firmware(sc, 0);
4215 if (status != 0) {
4216 device_printf(sc->dev, "Falling back to a single slice\n");
4217 return;
4220 /* try to send a reset command to the card to see if it
4221 is alive */
4222 memset(&cmd, 0, sizeof (cmd));
4223 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd);
4224 if (status != 0) {
4225 device_printf(sc->dev, "failed reset\n");
4226 goto abort_with_fw;
4229 /* get rx ring size */
4230 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd);
4231 if (status != 0) {
4232 device_printf(sc->dev, "Cannot determine rx ring size\n");
4233 goto abort_with_fw;
4235 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t));
4237 /* tell it the size of the interrupt queues */
4238 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot);
4239 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd);
4240 if (status != 0) {
4241 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4242 goto abort_with_fw;
4245 /* ask the maximum number of slices it supports */
4246 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd);
4247 if (status != 0) {
4248 device_printf(sc->dev,
4249 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4250 goto abort_with_fw;
4252 sc->num_slices = cmd.data0;
4253 if (sc->num_slices > msix_cnt)
4254 sc->num_slices = msix_cnt;
4256 if (mxge_max_slices == -1) {
4257 /* cap to number of CPUs in system */
4258 if (sc->num_slices > ncpus)
4259 sc->num_slices = ncpus;
4260 } else {
4261 if (sc->num_slices > mxge_max_slices)
4262 sc->num_slices = mxge_max_slices;
4264 /* make sure it is a power of two */
4265 while (sc->num_slices & (sc->num_slices - 1))
4266 sc->num_slices--;
4268 if (mxge_verbose)
4269 device_printf(sc->dev, "using %d slices\n",
4270 sc->num_slices);
4272 return;
4274 abort_with_fw:
4275 sc->fw_name = old_fw;
4276 (void) mxge_load_firmware(sc, 0);
4279 static int
4280 mxge_add_msix_irqs(mxge_softc_t *sc)
4282 size_t bytes;
4283 int count, err, i, rid;
4285 rid = PCIR_BAR(2);
4286 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
4287 &rid, RF_ACTIVE);
4289 if (sc->msix_table_res == NULL) {
4290 device_printf(sc->dev, "couldn't alloc MSIX table res\n");
4291 return ENXIO;
4294 count = sc->num_slices;
4295 err = pci_alloc_msix(sc->dev, &count);
4296 if (err != 0) {
4297 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d"
4298 "err = %d \n", sc->num_slices, err);
4299 goto abort_with_msix_table;
4301 if (count < sc->num_slices) {
4302 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n",
4303 count, sc->num_slices);
4304 device_printf(sc->dev,
4305 "Try setting hw.mxge.max_slices to %d\n",
4306 count);
4307 err = ENOSPC;
4308 goto abort_with_msix;
4310 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices;
4311 sc->msix_irq_res = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4312 if (sc->msix_irq_res == NULL) {
4313 err = ENOMEM;
4314 goto abort_with_msix;
4317 for (i = 0; i < sc->num_slices; i++) {
4318 rid = i + 1;
4319 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev,
4320 SYS_RES_IRQ,
4321 &rid, RF_ACTIVE);
4322 if (sc->msix_irq_res[i] == NULL) {
4323 device_printf(sc->dev, "couldn't allocate IRQ res"
4324 " for message %d\n", i);
4325 err = ENXIO;
4326 goto abort_with_res;
4330 bytes = sizeof (*sc->msix_ih) * sc->num_slices;
4331 sc->msix_ih = kmalloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO);
4333 for (i = 0; i < sc->num_slices; i++) {
4334 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i],
4335 INTR_MPSAFE,
4336 mxge_intr, &sc->ss[i], &sc->msix_ih[i],
4337 sc->ifp->if_serializer);
4338 if (err != 0) {
4339 device_printf(sc->dev, "couldn't setup intr for "
4340 "message %d\n", i);
4341 goto abort_with_intr;
4345 if (mxge_verbose) {
4346 device_printf(sc->dev, "using %d msix IRQs:",
4347 sc->num_slices);
4348 for (i = 0; i < sc->num_slices; i++)
4349 kprintf(" %ld", rman_get_start(sc->msix_irq_res[i]));
4350 kprintf("\n");
4352 return (0);
4354 abort_with_intr:
4355 for (i = 0; i < sc->num_slices; i++) {
4356 if (sc->msix_ih[i] != NULL) {
4357 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4358 sc->msix_ih[i]);
4359 sc->msix_ih[i] = NULL;
4362 kfree(sc->msix_ih, M_DEVBUF);
4365 abort_with_res:
4366 for (i = 0; i < sc->num_slices; i++) {
4367 rid = i + 1;
4368 if (sc->msix_irq_res[i] != NULL)
4369 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4370 sc->msix_irq_res[i]);
4371 sc->msix_irq_res[i] = NULL;
4373 kfree(sc->msix_irq_res, M_DEVBUF);
4376 abort_with_msix:
4377 pci_release_msi(sc->dev);
4379 abort_with_msix_table:
4380 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4381 sc->msix_table_res);
4383 return err;
4386 static int
4387 mxge_add_single_irq(mxge_softc_t *sc)
4389 int count, err, rid;
4391 count = pci_msi_count(sc->dev);
4392 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) {
4393 rid = 1;
4394 } else {
4395 rid = 0;
4396 sc->legacy_irq = 1;
4398 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0,
4399 1, RF_SHAREABLE | RF_ACTIVE);
4400 if (sc->irq_res == NULL) {
4401 device_printf(sc->dev, "could not alloc interrupt\n");
4402 return ENXIO;
4404 if (mxge_verbose)
4405 device_printf(sc->dev, "using %s irq %ld\n",
4406 sc->legacy_irq ? "INTx" : "MSI",
4407 rman_get_start(sc->irq_res));
4408 err = bus_setup_intr(sc->dev, sc->irq_res,
4409 INTR_MPSAFE,
4410 mxge_intr, &sc->ss[0], &sc->ih,
4411 sc->ifp->if_serializer);
4412 if (err != 0) {
4413 bus_release_resource(sc->dev, SYS_RES_IRQ,
4414 sc->legacy_irq ? 0 : 1, sc->irq_res);
4415 if (!sc->legacy_irq)
4416 pci_release_msi(sc->dev);
4418 return err;
4421 static void
4422 mxge_rem_msix_irqs(mxge_softc_t *sc)
4424 int i, rid;
4426 for (i = 0; i < sc->num_slices; i++) {
4427 if (sc->msix_ih[i] != NULL) {
4428 bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
4429 sc->msix_ih[i]);
4430 sc->msix_ih[i] = NULL;
4433 kfree(sc->msix_ih, M_DEVBUF);
4435 for (i = 0; i < sc->num_slices; i++) {
4436 rid = i + 1;
4437 if (sc->msix_irq_res[i] != NULL)
4438 bus_release_resource(sc->dev, SYS_RES_IRQ, rid,
4439 sc->msix_irq_res[i]);
4440 sc->msix_irq_res[i] = NULL;
4442 kfree(sc->msix_irq_res, M_DEVBUF);
4444 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2),
4445 sc->msix_table_res);
4447 pci_release_msi(sc->dev);
4448 return;
4451 static void
4452 mxge_rem_single_irq(mxge_softc_t *sc)
4454 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih);
4455 bus_release_resource(sc->dev, SYS_RES_IRQ,
4456 sc->legacy_irq ? 0 : 1, sc->irq_res);
4457 if (!sc->legacy_irq)
4458 pci_release_msi(sc->dev);
4461 static void
4462 mxge_rem_irq(mxge_softc_t *sc)
4464 if (sc->num_slices > 1)
4465 mxge_rem_msix_irqs(sc);
4466 else
4467 mxge_rem_single_irq(sc);
4470 static int
4471 mxge_add_irq(mxge_softc_t *sc)
4473 int err;
4475 if (sc->num_slices > 1)
4476 err = mxge_add_msix_irqs(sc);
4477 else
4478 err = mxge_add_single_irq(sc);
4480 if (0 && err == 0 && sc->num_slices > 1) {
4481 mxge_rem_msix_irqs(sc);
4482 err = mxge_add_msix_irqs(sc);
4484 return err;
4488 static int
4489 mxge_attach(device_t dev)
4491 mxge_softc_t *sc = device_get_softc(dev);
4492 struct ifnet *ifp = &sc->arpcom.ac_if;
4493 int err, rid;
4496 * avoid rewriting half the lines in this file to use
4497 * &sc->arpcom.ac_if instead
4499 sc->ifp = ifp;
4500 sc->dev = dev;
4501 mxge_fetch_tunables(sc);
4503 err = bus_dma_tag_create(NULL, /* parent */
4504 1, /* alignment */
4505 0, /* boundary */
4506 BUS_SPACE_MAXADDR, /* low */
4507 BUS_SPACE_MAXADDR, /* high */
4508 NULL, NULL, /* filter */
4509 65536 + 256, /* maxsize */
4510 MXGE_MAX_SEND_DESC, /* num segs */
4511 65536, /* maxsegsize */
4512 0, /* flags */
4513 &sc->parent_dmat); /* tag */
4515 if (err != 0) {
4516 device_printf(sc->dev, "Err %d allocating parent dmat\n",
4517 err);
4518 goto abort_with_nothing;
4521 sc->ifp = ifp;
4522 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
4524 callout_init_mp(&sc->co_hdl);
4526 mxge_setup_cfg_space(sc);
4528 /* Map the board into the kernel */
4529 rid = PCIR_BARS;
4530 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0,
4531 ~0, 1, RF_ACTIVE);
4532 if (sc->mem_res == NULL) {
4533 device_printf(dev, "could not map memory\n");
4534 err = ENXIO;
4535 goto abort_with_nothing;
4537 sc->sram = rman_get_virtual(sc->mem_res);
4538 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4539 if (sc->sram_size > rman_get_size(sc->mem_res)) {
4540 device_printf(dev, "impossible memory region size %ld\n",
4541 rman_get_size(sc->mem_res));
4542 err = ENXIO;
4543 goto abort_with_mem_res;
4546 /* make NULL terminated copy of the EEPROM strings section of
4547 lanai SRAM */
4548 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE);
4549 bus_space_read_region_1(rman_get_bustag(sc->mem_res),
4550 rman_get_bushandle(sc->mem_res),
4551 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE,
4552 sc->eeprom_strings,
4553 MXGE_EEPROM_STRINGS_SIZE - 2);
4554 err = mxge_parse_strings(sc);
4555 if (err != 0)
4556 goto abort_with_mem_res;
4558 /* Enable write combining for efficient use of PCIe bus */
4559 mxge_enable_wc(sc);
4561 /* Allocate the out of band dma memory */
4562 err = mxge_dma_alloc(sc, &sc->cmd_dma,
4563 sizeof (mxge_cmd_t), 64);
4564 if (err != 0)
4565 goto abort_with_mem_res;
4566 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr;
4567 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64);
4568 if (err != 0)
4569 goto abort_with_cmd_dma;
4571 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096);
4572 if (err != 0)
4573 goto abort_with_zeropad_dma;
4575 /* select & load the firmware */
4576 err = mxge_select_firmware(sc);
4577 if (err != 0)
4578 goto abort_with_dmabench;
4579 sc->intr_coal_delay = mxge_intr_coal_delay;
4581 mxge_slice_probe(sc);
4582 err = mxge_alloc_slices(sc);
4583 if (err != 0)
4584 goto abort_with_dmabench;
4586 err = mxge_reset(sc, 0);
4587 if (err != 0)
4588 goto abort_with_slices;
4590 err = mxge_alloc_rings(sc);
4591 if (err != 0) {
4592 device_printf(sc->dev, "failed to allocate rings\n");
4593 goto abort_with_dmabench;
4596 ifp->if_baudrate = IF_Gbps(10UL);
4597 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
4598 IFCAP_VLAN_MTU;
4599 #ifdef INET
4600 ifp->if_capabilities |= IFCAP_LRO;
4601 #endif
4603 #ifdef MXGE_NEW_VLAN_API
4604 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
4605 #endif
4607 sc->max_mtu = mxge_max_mtu(sc);
4608 if (sc->max_mtu >= 9000)
4609 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
4610 else
4611 device_printf(dev, "MTU limited to %d. Install "
4612 "latest firmware for 9000 byte jumbo support\n",
4613 sc->max_mtu - ETHER_HDR_LEN);
4614 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
4615 ifp->if_capenable = ifp->if_capabilities;
4616 if (sc->lro_cnt == 0)
4617 ifp->if_capenable &= ~IFCAP_LRO;
4618 sc->csum_flag = 1;
4619 ifp->if_init = mxge_init;
4620 ifp->if_softc = sc;
4621 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
4622 ifp->if_ioctl = mxge_ioctl;
4623 ifp->if_start = mxge_start;
4624 /* Initialise the ifmedia structure */
4625 ifmedia_init(&sc->media, 0, mxge_media_change,
4626 mxge_media_status);
4627 mxge_set_media(sc, IFM_ETHER | IFM_AUTO);
4628 mxge_media_probe(sc);
4629 sc->dying = 0;
4630 ether_ifattach(ifp, sc->mac_addr, NULL);
4631 /* ether_ifattach sets mtu to ETHERMTU */
4632 if (mxge_initial_mtu != ETHERMTU) {
4633 lwkt_serialize_enter(ifp->if_serializer);
4634 mxge_change_mtu(sc, mxge_initial_mtu);
4635 lwkt_serialize_exit(ifp->if_serializer);
4637 /* must come after ether_ifattach() */
4638 err = mxge_add_irq(sc);
4639 if (err != 0) {
4640 device_printf(sc->dev, "failed to add irq\n");
4641 goto abort_with_rings;
4644 mxge_add_sysctls(sc);
4645 #ifdef IFNET_BUF_RING
4646 ifp->if_transmit = mxge_transmit;
4647 ifp->if_qflush = mxge_qflush;
4648 #endif
4649 return 0;
4651 abort_with_rings:
4652 mxge_free_rings(sc);
4653 abort_with_slices:
4654 mxge_free_slices(sc);
4655 abort_with_dmabench:
4656 mxge_dma_free(&sc->dmabench_dma);
4657 abort_with_zeropad_dma:
4658 mxge_dma_free(&sc->zeropad_dma);
4659 abort_with_cmd_dma:
4660 mxge_dma_free(&sc->cmd_dma);
4661 abort_with_mem_res:
4662 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4663 pci_disable_busmaster(dev);
4664 bus_dma_tag_destroy(sc->parent_dmat);
4665 abort_with_nothing:
4666 return err;
4669 static int
4670 mxge_detach(device_t dev)
4672 mxge_softc_t *sc = device_get_softc(dev);
4674 lwkt_serialize_enter(sc->ifp->if_serializer);
4675 sc->dying = 1;
4676 if (sc->ifp->if_flags & IFF_RUNNING)
4677 mxge_close(sc);
4679 * XXX: race: the callout callback could be spinning on
4680 * the serializer and run anyway
4682 callout_stop(&sc->co_hdl);
4683 lwkt_serialize_exit(sc->ifp->if_serializer);
4685 ether_ifdetach(sc->ifp);
4686 ifmedia_removeall(&sc->media);
4687 mxge_dummy_rdma(sc, 0);
4688 mxge_rem_sysctls(sc);
4689 mxge_rem_irq(sc);
4690 mxge_free_rings(sc);
4691 mxge_free_slices(sc);
4692 mxge_dma_free(&sc->dmabench_dma);
4693 mxge_dma_free(&sc->zeropad_dma);
4694 mxge_dma_free(&sc->cmd_dma);
4695 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res);
4696 pci_disable_busmaster(dev);
4697 bus_dma_tag_destroy(sc->parent_dmat);
4698 return 0;
4701 static int
4702 mxge_shutdown(device_t dev)
4704 return 0;
4708 This file uses Myri10GE driver indentation.
4710 Local Variables:
4711 c-file-style:"linux"
4712 tab-width:8
4713 End: