1 /******************************************************************************
3 Copyright (c) 2006-2009, Myricom Inc.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Neither the name of the Myricom Inc, nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
28 ***************************************************************************/
30 #include <sys/cdefs.h>
31 /*__FBSDID("$FreeBSD: src/sys/dev/mxge/if_mxge.c,v 1.63 2009/06/26 11:45:06 rwatson Exp $");*/
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/linker.h>
36 #include <sys/firmware.h>
37 #include <sys/endian.h>
38 #include <sys/sockio.h>
40 #include <sys/malloc.h>
41 #include <sys/kernel.h>
43 #include <sys/module.h>
44 #include <sys/socket.h>
45 #include <sys/sysctl.h>
47 /* count xmits ourselves, rather than via drbr */
50 #include <net/if_arp.h>
51 #include <net/ethernet.h>
52 #include <net/if_dl.h>
53 #include <net/if_media.h>
57 #include <net/if_types.h>
58 #include <net/vlan/if_vlan_var.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/tcp.h>
66 #include <machine/resource.h>
70 #include <bus/pci/pcireg.h>
71 #include <bus/pci/pcivar.h>
72 #include <bus/pci/pci_private.h> /* XXX for pci_cfg_restore */
74 #include <vm/vm.h> /* for pmap_mapdev() */
77 #if defined(__i386) || defined(__amd64)
78 #include <machine/specialreg.h>
81 #include <dev/netif/mxge/mxge_mcp.h>
82 #include <dev/netif/mxge/mcp_gen_header.h>
83 /*#define MXGE_FAKE_IFP*/
84 #include <dev/netif/mxge/if_mxge_var.h>
86 #include <sys/buf_ring.h>
92 static int mxge_nvidia_ecrc_enable
= 1;
93 static int mxge_force_firmware
= 0;
94 static int mxge_intr_coal_delay
= 30;
95 static int mxge_deassert_wait
= 1;
96 static int mxge_flow_control
= 1;
97 static int mxge_verbose
= 0;
98 static int mxge_lro_cnt
= 8;
99 static int mxge_ticks
;
100 static int mxge_max_slices
= 1;
101 static int mxge_rss_hash_type
= MXGEFW_RSS_HASH_TYPE_SRC_PORT
;
102 static int mxge_always_promisc
= 0;
103 static int mxge_initial_mtu
= ETHERMTU_JUMBO
;
104 static char *mxge_fw_unaligned
= "mxge_ethp_z8e";
105 static char *mxge_fw_aligned
= "mxge_eth_z8e";
106 static char *mxge_fw_rss_aligned
= "mxge_rss_eth_z8e";
107 static char *mxge_fw_rss_unaligned
= "mxge_rss_ethp_z8e";
109 static int mxge_probe(device_t dev
);
110 static int mxge_attach(device_t dev
);
111 static int mxge_detach(device_t dev
);
112 static int mxge_shutdown(device_t dev
);
113 static void mxge_intr(void *arg
);
115 static device_method_t mxge_methods
[] =
117 /* Device interface */
118 DEVMETHOD(device_probe
, mxge_probe
),
119 DEVMETHOD(device_attach
, mxge_attach
),
120 DEVMETHOD(device_detach
, mxge_detach
),
121 DEVMETHOD(device_shutdown
, mxge_shutdown
),
125 static driver_t mxge_driver
=
129 sizeof(mxge_softc_t
),
132 static devclass_t mxge_devclass
;
134 /* Declare ourselves to be a child of the PCI bus.*/
135 DRIVER_MODULE(mxge
, pci
, mxge_driver
, mxge_devclass
, 0, 0);
136 MODULE_DEPEND(mxge
, firmware
, 1, 1, 1);
137 MODULE_DEPEND(mxge
, zlib
, 1, 1, 1);
139 static int mxge_load_firmware(mxge_softc_t
*sc
, int adopt
);
140 static int mxge_send_cmd(mxge_softc_t
*sc
, uint32_t cmd
, mxge_cmd_t
*data
);
141 static int mxge_close(mxge_softc_t
*sc
);
142 static int mxge_open(mxge_softc_t
*sc
);
143 static void mxge_tick(void *arg
);
146 mxge_probe(device_t dev
)
151 if ((pci_get_vendor(dev
) == MXGE_PCI_VENDOR_MYRICOM
) &&
152 ((pci_get_device(dev
) == MXGE_PCI_DEVICE_Z8E
) ||
153 (pci_get_device(dev
) == MXGE_PCI_DEVICE_Z8E_9
))) {
154 rev
= pci_get_revid(dev
);
156 case MXGE_PCI_REV_Z8E
:
157 device_set_desc(dev
, "Myri10G-PCIE-8A");
159 case MXGE_PCI_REV_Z8ES
:
160 device_set_desc(dev
, "Myri10G-PCIE-8B");
163 device_set_desc(dev
, "Myri10G-PCIE-8??");
164 device_printf(dev
, "Unrecognized rev %d NIC\n",
174 mxge_enable_wc(mxge_softc_t
*sc
)
176 #if defined(__i386) || defined(__amd64)
181 len
= rman_get_size(sc
->mem_res
);
182 err
= pmap_change_attr((vm_offset_t
) sc
->sram
,
183 len
, PAT_WRITE_COMBINING
);
185 device_printf(sc
->dev
, "pmap_change_attr failed, %d\n",
193 /* callback to get our DMA address */
195 mxge_dmamap_callback(void *arg
, bus_dma_segment_t
*segs
, int nsegs
,
199 *(bus_addr_t
*) arg
= segs
->ds_addr
;
204 mxge_dma_alloc(mxge_softc_t
*sc
, mxge_dma_t
*dma
, size_t bytes
,
205 bus_size_t alignment
)
208 device_t dev
= sc
->dev
;
209 bus_size_t boundary
, maxsegsize
;
211 if (bytes
> 4096 && alignment
== 4096) {
219 /* allocate DMAable memory tags */
220 err
= bus_dma_tag_create(sc
->parent_dmat
, /* parent */
221 alignment
, /* alignment */
222 boundary
, /* boundary */
223 BUS_SPACE_MAXADDR
, /* low */
224 BUS_SPACE_MAXADDR
, /* high */
225 NULL
, NULL
, /* filter */
228 maxsegsize
, /* maxsegsize */
229 BUS_DMA_COHERENT
, /* flags */
230 NULL
, NULL
, /* lock */
231 &dma
->dmat
); /* tag */
233 device_printf(dev
, "couldn't alloc tag (err = %d)\n", err
);
237 /* allocate DMAable memory & map */
238 err
= bus_dmamem_alloc(dma
->dmat
, &dma
->addr
,
239 (BUS_DMA_WAITOK
| BUS_DMA_COHERENT
240 | BUS_DMA_ZERO
), &dma
->map
);
242 device_printf(dev
, "couldn't alloc mem (err = %d)\n", err
);
243 goto abort_with_dmat
;
246 /* load the memory */
247 err
= bus_dmamap_load(dma
->dmat
, dma
->map
, dma
->addr
, bytes
,
248 mxge_dmamap_callback
,
249 (void *)&dma
->bus_addr
, 0);
251 device_printf(dev
, "couldn't load map (err = %d)\n", err
);
257 bus_dmamem_free(dma
->dmat
, dma
->addr
, dma
->map
);
259 (void)bus_dma_tag_destroy(dma
->dmat
);
265 mxge_dma_free(mxge_dma_t
*dma
)
267 bus_dmamap_unload(dma
->dmat
, dma
->map
);
268 bus_dmamem_free(dma
->dmat
, dma
->addr
, dma
->map
);
269 (void)bus_dma_tag_destroy(dma
->dmat
);
273 * The eeprom strings on the lanaiX have the format
280 mxge_parse_strings(mxge_softc_t
*sc
)
282 #define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++)
287 ptr
= sc
->eeprom_strings
;
288 limit
= sc
->eeprom_strings
+ MXGE_EEPROM_STRINGS_SIZE
;
290 while (ptr
< limit
&& *ptr
!= '\0') {
291 if (memcmp(ptr
, "MAC=", 4) == 0) {
293 sc
->mac_addr_string
= ptr
;
294 for (i
= 0; i
< 6; i
++) {
296 if ((ptr
+ 2) > limit
)
298 sc
->mac_addr
[i
] = strtoul(ptr
, NULL
, 16);
301 } else if (memcmp(ptr
, "PC=", 3) == 0) {
303 strncpy(sc
->product_code_string
, ptr
,
304 sizeof (sc
->product_code_string
) - 1);
305 } else if (memcmp(ptr
, "SN=", 3) == 0) {
307 strncpy(sc
->serial_number_string
, ptr
,
308 sizeof (sc
->serial_number_string
) - 1);
310 MXGE_NEXT_STRING(ptr
);
317 device_printf(sc
->dev
, "failed to parse eeprom_strings\n");
322 #if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__
324 mxge_enable_nvidia_ecrc(mxge_softc_t
*sc
)
327 unsigned long base
, off
;
329 device_t pdev
, mcp55
;
330 uint16_t vendor_id
, device_id
, word
;
331 uintptr_t bus
, slot
, func
, ivend
, idev
;
335 if (!mxge_nvidia_ecrc_enable
)
338 pdev
= device_get_parent(device_get_parent(sc
->dev
));
340 device_printf(sc
->dev
, "could not find parent?\n");
343 vendor_id
= pci_read_config(pdev
, PCIR_VENDOR
, 2);
344 device_id
= pci_read_config(pdev
, PCIR_DEVICE
, 2);
346 if (vendor_id
!= 0x10de)
351 if (device_id
== 0x005d) {
352 /* ck804, base address is magic */
354 } else if (device_id
>= 0x0374 && device_id
<= 0x378) {
355 /* mcp55, base address stored in chipset */
356 mcp55
= pci_find_bsf(0, 0, 0);
358 0x10de == pci_read_config(mcp55
, PCIR_VENDOR
, 2) &&
359 0x0369 == pci_read_config(mcp55
, PCIR_DEVICE
, 2)) {
360 word
= pci_read_config(mcp55
, 0x90, 2);
361 base
= ((unsigned long)word
& 0x7ffeU
) << 25;
368 Test below is commented because it is believed that doing
369 config read/write beyond 0xff will access the config space
370 for the next larger function. Uncomment this and remove
371 the hacky pmap_mapdev() way of accessing config space when
372 FreeBSD grows support for extended pcie config space access
375 /* See if we can, by some miracle, access the extended
377 val
= pci_read_config(pdev
, 0x178, 4);
378 if (val
!= 0xffffffff) {
380 pci_write_config(pdev
, 0x178, val
, 4);
384 /* Rather than using normal pci config space writes, we must
385 * map the Nvidia config space ourselves. This is because on
386 * opteron/nvidia class machine the 0xe000000 mapping is
387 * handled by the nvidia chipset, that means the internal PCI
388 * device (the on-chip northbridge), or the amd-8131 bridge
389 * and things behind them are not visible by this method.
392 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
394 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
395 PCI_IVAR_SLOT
, &slot
);
396 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
397 PCI_IVAR_FUNCTION
, &func
);
398 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
399 PCI_IVAR_VENDOR
, &ivend
);
400 BUS_READ_IVAR(device_get_parent(pdev
), pdev
,
401 PCI_IVAR_DEVICE
, &idev
);
404 + 0x00100000UL
* (unsigned long)bus
405 + 0x00001000UL
* (unsigned long)(func
408 /* map it into the kernel */
409 va
= pmap_mapdev(trunc_page((vm_paddr_t
)off
), PAGE_SIZE
);
413 device_printf(sc
->dev
, "pmap_kenter_temporary didn't\n");
416 /* get a pointer to the config space mapped into the kernel */
417 cfgptr
= va
+ (off
& PAGE_MASK
);
419 /* make sure that we can really access it */
420 vendor_id
= *(uint16_t *)(cfgptr
+ PCIR_VENDOR
);
421 device_id
= *(uint16_t *)(cfgptr
+ PCIR_DEVICE
);
422 if (! (vendor_id
== ivend
&& device_id
== idev
)) {
423 device_printf(sc
->dev
, "mapping failed: 0x%x:0x%x\n",
424 vendor_id
, device_id
);
425 pmap_unmapdev((vm_offset_t
)va
, PAGE_SIZE
);
429 ptr32
= (uint32_t*)(cfgptr
+ 0x178);
432 if (val
== 0xffffffff) {
433 device_printf(sc
->dev
, "extended mapping failed\n");
434 pmap_unmapdev((vm_offset_t
)va
, PAGE_SIZE
);
438 pmap_unmapdev((vm_offset_t
)va
, PAGE_SIZE
);
440 device_printf(sc
->dev
,
441 "Enabled ECRC on upstream Nvidia bridge "
443 (int)bus
, (int)slot
, (int)func
);
448 mxge_enable_nvidia_ecrc(mxge_softc_t
*sc
)
450 device_printf(sc
->dev
,
451 "Nforce 4 chipset on non-x86/amd64!?!?!\n");
458 mxge_dma_test(mxge_softc_t
*sc
, int test_type
)
461 bus_addr_t dmatest_bus
= sc
->dmabench_dma
.bus_addr
;
467 /* Run a small DMA test.
468 * The magic multipliers to the length tell the firmware
469 * to do DMA read, write, or read+write tests. The
470 * results are returned in cmd.data0. The upper 16
471 * bits of the return is the number of transfers completed.
472 * The lower 16 bits is the time in 0.5us ticks that the
473 * transfers took to complete.
476 len
= sc
->tx_boundary
;
478 cmd
.data0
= MXGE_LOWPART_TO_U32(dmatest_bus
);
479 cmd
.data1
= MXGE_HIGHPART_TO_U32(dmatest_bus
);
480 cmd
.data2
= len
* 0x10000;
481 status
= mxge_send_cmd(sc
, test_type
, &cmd
);
486 sc
->read_dma
= ((cmd
.data0
>>16) * len
* 2) /
487 (cmd
.data0
& 0xffff);
488 cmd
.data0
= MXGE_LOWPART_TO_U32(dmatest_bus
);
489 cmd
.data1
= MXGE_HIGHPART_TO_U32(dmatest_bus
);
490 cmd
.data2
= len
* 0x1;
491 status
= mxge_send_cmd(sc
, test_type
, &cmd
);
496 sc
->write_dma
= ((cmd
.data0
>>16) * len
* 2) /
497 (cmd
.data0
& 0xffff);
499 cmd
.data0
= MXGE_LOWPART_TO_U32(dmatest_bus
);
500 cmd
.data1
= MXGE_HIGHPART_TO_U32(dmatest_bus
);
501 cmd
.data2
= len
* 0x10001;
502 status
= mxge_send_cmd(sc
, test_type
, &cmd
);
507 sc
->read_write_dma
= ((cmd
.data0
>>16) * len
* 2 * 2) /
508 (cmd
.data0
& 0xffff);
511 if (status
!= 0 && test_type
!= MXGEFW_CMD_UNALIGNED_TEST
)
512 device_printf(sc
->dev
, "DMA %s benchmark failed: %d\n",
519 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput
520 * when the PCI-E Completion packets are aligned on an 8-byte
521 * boundary. Some PCI-E chip sets always align Completion packets; on
522 * the ones that do not, the alignment can be enforced by enabling
523 * ECRC generation (if supported).
525 * When PCI-E Completion packets are not aligned, it is actually more
526 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB.
528 * If the driver can neither enable ECRC nor verify that it has
529 * already been enabled, then it must use a firmware image which works
530 * around unaligned completion packets (ethp_z8e.dat), and it should
531 * also ensure that it never gives the device a Read-DMA which is
532 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is
533 * enabled, then the driver should use the aligned (eth_z8e.dat)
534 * firmware image, and set tx_boundary to 4KB.
538 mxge_firmware_probe(mxge_softc_t
*sc
)
540 device_t dev
= sc
->dev
;
544 sc
->tx_boundary
= 4096;
546 * Verify the max read request size was set to 4KB
547 * before trying the test with 4KB.
549 if (pci_find_extcap(dev
, PCIY_EXPRESS
, ®
) == 0) {
550 pectl
= pci_read_config(dev
, reg
+ 0x8, 2);
551 if ((pectl
& (5 << 12)) != (5 << 12)) {
552 device_printf(dev
, "Max Read Req. size != 4k (0x%x\n",
554 sc
->tx_boundary
= 2048;
559 * load the optimized firmware (which assumes aligned PCIe
560 * completions) in order to see if it works on this host.
562 sc
->fw_name
= mxge_fw_aligned
;
563 status
= mxge_load_firmware(sc
, 1);
569 * Enable ECRC if possible
571 mxge_enable_nvidia_ecrc(sc
);
574 * Run a DMA test which watches for unaligned completions and
575 * aborts on the first one seen.
578 status
= mxge_dma_test(sc
, MXGEFW_CMD_UNALIGNED_TEST
);
580 return 0; /* keep the aligned firmware */
583 device_printf(dev
, "DMA test failed: %d\n", status
);
584 if (status
== ENOSYS
)
585 device_printf(dev
, "Falling back to ethp! "
586 "Please install up to date fw\n");
591 mxge_select_firmware(mxge_softc_t
*sc
)
596 if (mxge_force_firmware
!= 0) {
597 if (mxge_force_firmware
== 1)
602 device_printf(sc
->dev
,
603 "Assuming %s completions (forced)\n",
604 aligned
? "aligned" : "unaligned");
608 /* if the PCIe link width is 4 or less, we can use the aligned
609 firmware and skip any checks */
610 if (sc
->link_width
!= 0 && sc
->link_width
<= 4) {
611 device_printf(sc
->dev
,
612 "PCIe x%d Link, expect reduced performance\n",
618 if (0 == mxge_firmware_probe(sc
))
623 sc
->fw_name
= mxge_fw_aligned
;
624 sc
->tx_boundary
= 4096;
626 sc
->fw_name
= mxge_fw_unaligned
;
627 sc
->tx_boundary
= 2048;
629 return (mxge_load_firmware(sc
, 0));
639 mxge_validate_firmware(mxge_softc_t
*sc
, const mcp_gen_header_t
*hdr
)
643 if (be32toh(hdr
->mcp_type
) != MCP_TYPE_ETH
) {
644 device_printf(sc
->dev
, "Bad firmware type: 0x%x\n",
645 be32toh(hdr
->mcp_type
));
649 /* save firmware version for sysctl */
650 strncpy(sc
->fw_version
, hdr
->version
, sizeof (sc
->fw_version
));
652 device_printf(sc
->dev
, "firmware id: %s\n", hdr
->version
);
654 ksscanf(sc
->fw_version
, "%d.%d.%d", &sc
->fw_ver_major
,
655 &sc
->fw_ver_minor
, &sc
->fw_ver_tiny
);
657 if (!(sc
->fw_ver_major
== MXGEFW_VERSION_MAJOR
658 && sc
->fw_ver_minor
== MXGEFW_VERSION_MINOR
)) {
659 device_printf(sc
->dev
, "Found firmware version %s\n",
661 device_printf(sc
->dev
, "Driver needs %d.%d\n",
662 MXGEFW_VERSION_MAJOR
, MXGEFW_VERSION_MINOR
);
670 z_alloc(void *nil
, u_int items
, u_int size
)
674 ptr
= kmalloc(items
* size
, M_TEMP
, M_NOWAIT
);
679 z_free(void *nil
, void *ptr
)
686 mxge_load_firmware_helper(mxge_softc_t
*sc
, uint32_t *limit
)
689 char *inflate_buffer
;
690 const struct firmware
*fw
;
691 const mcp_gen_header_t
*hdr
;
698 fw
= firmware_get(sc
->fw_name
);
700 device_printf(sc
->dev
, "Could not find firmware image %s\n",
707 /* setup zlib and decompress f/w */
708 bzero(&zs
, sizeof (zs
));
711 status
= inflateInit(&zs
);
712 if (status
!= Z_OK
) {
717 /* the uncompressed size is stored as the firmware version,
718 which would otherwise go unused */
719 fw_len
= (size_t) fw
->version
;
720 inflate_buffer
= kmalloc(fw_len
, M_TEMP
, M_NOWAIT
);
721 if (inflate_buffer
== NULL
)
723 zs
.avail_in
= fw
->datasize
;
724 zs
.next_in
= __DECONST(char *, fw
->data
);
725 zs
.avail_out
= fw_len
;
726 zs
.next_out
= inflate_buffer
;
727 status
= inflate(&zs
, Z_FINISH
);
728 if (status
!= Z_STREAM_END
) {
729 device_printf(sc
->dev
, "zlib %d\n", status
);
731 goto abort_with_buffer
;
735 hdr_offset
= htobe32(*(const uint32_t *)
736 (inflate_buffer
+ MCP_HEADER_PTR_OFFSET
));
737 if ((hdr_offset
& 3) || hdr_offset
+ sizeof(*hdr
) > fw_len
) {
738 device_printf(sc
->dev
, "Bad firmware file");
740 goto abort_with_buffer
;
742 hdr
= (const void*)(inflate_buffer
+ hdr_offset
);
744 status
= mxge_validate_firmware(sc
, hdr
);
746 goto abort_with_buffer
;
748 /* Copy the inflated firmware to NIC SRAM. */
749 for (i
= 0; i
< fw_len
; i
+= 256) {
750 mxge_pio_copy(sc
->sram
+ MXGE_FW_OFFSET
+ i
,
752 min(256U, (unsigned)(fw_len
- i
)));
761 kfree(inflate_buffer
, M_TEMP
);
765 firmware_put(fw
, FIRMWARE_UNLOAD
);
770 * Enable or disable periodic RDMAs from the host to make certain
771 * chipsets resend dropped PCIe messages
775 mxge_dummy_rdma(mxge_softc_t
*sc
, int enable
)
778 volatile uint32_t *confirm
;
779 volatile char *submit
;
780 uint32_t *buf
, dma_low
, dma_high
;
783 buf
= (uint32_t *)((unsigned long)(buf_bytes
+ 7) & ~7UL);
785 /* clear confirmation addr */
786 confirm
= (volatile uint32_t *)sc
->cmd
;
790 /* send an rdma command to the PCIe engine, and wait for the
791 response in the confirmation address. The firmware should
792 write a -1 there to indicate it is alive and well
795 dma_low
= MXGE_LOWPART_TO_U32(sc
->cmd_dma
.bus_addr
);
796 dma_high
= MXGE_HIGHPART_TO_U32(sc
->cmd_dma
.bus_addr
);
797 buf
[0] = htobe32(dma_high
); /* confirm addr MSW */
798 buf
[1] = htobe32(dma_low
); /* confirm addr LSW */
799 buf
[2] = htobe32(0xffffffff); /* confirm data */
800 dma_low
= MXGE_LOWPART_TO_U32(sc
->zeropad_dma
.bus_addr
);
801 dma_high
= MXGE_HIGHPART_TO_U32(sc
->zeropad_dma
.bus_addr
);
802 buf
[3] = htobe32(dma_high
); /* dummy addr MSW */
803 buf
[4] = htobe32(dma_low
); /* dummy addr LSW */
804 buf
[5] = htobe32(enable
); /* enable? */
807 submit
= (volatile char *)(sc
->sram
+ MXGEFW_BOOT_DUMMY_RDMA
);
809 mxge_pio_copy(submit
, buf
, 64);
814 while (*confirm
!= 0xffffffff && i
< 20) {
818 if (*confirm
!= 0xffffffff) {
819 device_printf(sc
->dev
, "dummy rdma %s failed (%p = 0x%x)",
820 (enable
? "enable" : "disable"), confirm
,
827 mxge_send_cmd(mxge_softc_t
*sc
, uint32_t cmd
, mxge_cmd_t
*data
)
830 char buf_bytes
[sizeof(*buf
) + 8];
831 volatile mcp_cmd_response_t
*response
= sc
->cmd
;
832 volatile char *cmd_addr
= sc
->sram
+ MXGEFW_ETH_CMD
;
833 uint32_t dma_low
, dma_high
;
834 int err
, sleep_total
= 0;
836 /* ensure buf is aligned to 8 bytes */
837 buf
= (mcp_cmd_t
*)((unsigned long)(buf_bytes
+ 7) & ~7UL);
839 buf
->data0
= htobe32(data
->data0
);
840 buf
->data1
= htobe32(data
->data1
);
841 buf
->data2
= htobe32(data
->data2
);
842 buf
->cmd
= htobe32(cmd
);
843 dma_low
= MXGE_LOWPART_TO_U32(sc
->cmd_dma
.bus_addr
);
844 dma_high
= MXGE_HIGHPART_TO_U32(sc
->cmd_dma
.bus_addr
);
846 buf
->response_addr
.low
= htobe32(dma_low
);
847 buf
->response_addr
.high
= htobe32(dma_high
);
848 lockmgr(&sc
->cmd_lock
, LK_EXCLUSIVE
);
849 response
->result
= 0xffffffff;
851 mxge_pio_copy((volatile void *)cmd_addr
, buf
, sizeof (*buf
));
853 /* wait up to 20ms */
855 for (sleep_total
= 0; sleep_total
< 20; sleep_total
++) {
856 bus_dmamap_sync(sc
->cmd_dma
.dmat
,
857 sc
->cmd_dma
.map
, BUS_DMASYNC_POSTREAD
);
859 switch (be32toh(response
->result
)) {
861 data
->data0
= be32toh(response
->data
);
867 case MXGEFW_CMD_UNKNOWN
:
870 case MXGEFW_CMD_ERROR_UNALIGNED
:
873 case MXGEFW_CMD_ERROR_BUSY
:
877 device_printf(sc
->dev
,
879 "failed, result = %d\n",
880 cmd
, be32toh(response
->result
));
888 device_printf(sc
->dev
, "mxge: command %d timed out"
890 cmd
, be32toh(response
->result
));
891 lockmgr(&sc
->cmd_lock
, LK_RELEASE
);
896 mxge_adopt_running_firmware(mxge_softc_t
*sc
)
898 struct mcp_gen_header
*hdr
;
899 const size_t bytes
= sizeof (struct mcp_gen_header
);
903 /* find running firmware header */
904 hdr_offset
= htobe32(*(volatile uint32_t *)
905 (sc
->sram
+ MCP_HEADER_PTR_OFFSET
));
907 if ((hdr_offset
& 3) || hdr_offset
+ sizeof(*hdr
) > sc
->sram_size
) {
908 device_printf(sc
->dev
,
909 "Running firmware has bad header offset (%d)\n",
914 /* copy header of running firmware from SRAM to host memory to
915 * validate firmware */
916 hdr
= kmalloc(bytes
, M_DEVBUF
, M_NOWAIT
);
918 device_printf(sc
->dev
, "could not kmalloc firmware hdr\n");
921 bus_space_read_region_1(rman_get_bustag(sc
->mem_res
),
922 rman_get_bushandle(sc
->mem_res
),
923 hdr_offset
, (char *)hdr
, bytes
);
924 status
= mxge_validate_firmware(sc
, hdr
);
925 kfree(hdr
, M_DEVBUF
);
928 * check to see if adopted firmware has bug where adopting
929 * it will cause broadcasts to be filtered unless the NIC
930 * is kept in ALLMULTI mode
932 if (sc
->fw_ver_major
== 1 && sc
->fw_ver_minor
== 4 &&
933 sc
->fw_ver_tiny
>= 4 && sc
->fw_ver_tiny
<= 11) {
934 sc
->adopted_rx_filter_bug
= 1;
935 device_printf(sc
->dev
, "Adopting fw %d.%d.%d: "
936 "working around rx filter bug\n",
937 sc
->fw_ver_major
, sc
->fw_ver_minor
,
946 mxge_load_firmware(mxge_softc_t
*sc
, int adopt
)
948 volatile uint32_t *confirm
;
949 volatile char *submit
;
951 uint32_t *buf
, size
, dma_low
, dma_high
;
954 buf
= (uint32_t *)((unsigned long)(buf_bytes
+ 7) & ~7UL);
956 size
= sc
->sram_size
;
957 status
= mxge_load_firmware_helper(sc
, &size
);
961 /* Try to use the currently running firmware, if
963 status
= mxge_adopt_running_firmware(sc
);
965 device_printf(sc
->dev
,
966 "failed to adopt running firmware\n");
969 device_printf(sc
->dev
,
970 "Successfully adopted running firmware\n");
971 if (sc
->tx_boundary
== 4096) {
972 device_printf(sc
->dev
,
973 "Using firmware currently running on NIC"
975 device_printf(sc
->dev
,
976 "performance consider loading optimized "
979 sc
->fw_name
= mxge_fw_unaligned
;
980 sc
->tx_boundary
= 2048;
983 /* clear confirmation addr */
984 confirm
= (volatile uint32_t *)sc
->cmd
;
987 /* send a reload command to the bootstrap MCP, and wait for the
988 response in the confirmation address. The firmware should
989 write a -1 there to indicate it is alive and well
992 dma_low
= MXGE_LOWPART_TO_U32(sc
->cmd_dma
.bus_addr
);
993 dma_high
= MXGE_HIGHPART_TO_U32(sc
->cmd_dma
.bus_addr
);
995 buf
[0] = htobe32(dma_high
); /* confirm addr MSW */
996 buf
[1] = htobe32(dma_low
); /* confirm addr LSW */
997 buf
[2] = htobe32(0xffffffff); /* confirm data */
999 /* FIX: All newest firmware should un-protect the bottom of
1000 the sram before handoff. However, the very first interfaces
1001 do not. Therefore the handoff copy must skip the first 8 bytes
1003 /* where the code starts*/
1004 buf
[3] = htobe32(MXGE_FW_OFFSET
+ 8);
1005 buf
[4] = htobe32(size
- 8); /* length of code */
1006 buf
[5] = htobe32(8); /* where to copy to */
1007 buf
[6] = htobe32(0); /* where to jump to */
1009 submit
= (volatile char *)(sc
->sram
+ MXGEFW_BOOT_HANDOFF
);
1010 mxge_pio_copy(submit
, buf
, 64);
1015 while (*confirm
!= 0xffffffff && i
< 20) {
1018 bus_dmamap_sync(sc
->cmd_dma
.dmat
,
1019 sc
->cmd_dma
.map
, BUS_DMASYNC_POSTREAD
);
1021 if (*confirm
!= 0xffffffff) {
1022 device_printf(sc
->dev
,"handoff failed (%p = 0x%x)",
1031 mxge_update_mac_address(mxge_softc_t
*sc
)
1034 uint8_t *addr
= sc
->mac_addr
;
1038 cmd
.data0
= ((addr
[0] << 24) | (addr
[1] << 16)
1039 | (addr
[2] << 8) | addr
[3]);
1041 cmd
.data1
= ((addr
[4] << 8) | (addr
[5]));
1043 status
= mxge_send_cmd(sc
, MXGEFW_SET_MAC_ADDRESS
, &cmd
);
1048 mxge_change_pause(mxge_softc_t
*sc
, int pause
)
1054 status
= mxge_send_cmd(sc
, MXGEFW_ENABLE_FLOW_CONTROL
,
1057 status
= mxge_send_cmd(sc
, MXGEFW_DISABLE_FLOW_CONTROL
,
1061 device_printf(sc
->dev
, "Failed to set flow control mode\n");
1069 mxge_change_promisc(mxge_softc_t
*sc
, int promisc
)
1074 if (mxge_always_promisc
)
1078 status
= mxge_send_cmd(sc
, MXGEFW_ENABLE_PROMISC
,
1081 status
= mxge_send_cmd(sc
, MXGEFW_DISABLE_PROMISC
,
1085 device_printf(sc
->dev
, "Failed to set promisc mode\n");
1090 mxge_set_multicast_list(mxge_softc_t
*sc
)
1093 struct ifmultiaddr
*ifma
;
1094 struct ifnet
*ifp
= sc
->ifp
;
1097 /* This firmware is known to not support multicast */
1098 if (!sc
->fw_multicast_support
)
1101 /* Disable multicast filtering while we play with the lists*/
1102 err
= mxge_send_cmd(sc
, MXGEFW_ENABLE_ALLMULTI
, &cmd
);
1104 device_printf(sc
->dev
, "Failed MXGEFW_ENABLE_ALLMULTI,"
1105 " error status: %d\n", err
);
1109 if (sc
->adopted_rx_filter_bug
)
1112 if (ifp
->if_flags
& IFF_ALLMULTI
)
1113 /* request to disable multicast filtering, so quit here */
1116 /* Flush all the filters */
1118 err
= mxge_send_cmd(sc
, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS
, &cmd
);
1120 device_printf(sc
->dev
,
1121 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS"
1122 ", error status: %d\n", err
);
1126 /* Walk the multicast list, and add each address */
1128 if_maddr_rlock(ifp
);
1129 TAILQ_FOREACH(ifma
, &ifp
->if_multiaddrs
, ifma_link
) {
1130 if (ifma
->ifma_addr
->sa_family
!= AF_LINK
)
1132 bcopy(LLADDR((struct sockaddr_dl
*)ifma
->ifma_addr
),
1134 bcopy(LLADDR((struct sockaddr_dl
*)ifma
->ifma_addr
) + 4,
1136 cmd
.data0
= htonl(cmd
.data0
);
1137 cmd
.data1
= htonl(cmd
.data1
);
1138 err
= mxge_send_cmd(sc
, MXGEFW_JOIN_MULTICAST_GROUP
, &cmd
);
1140 device_printf(sc
->dev
, "Failed "
1141 "MXGEFW_JOIN_MULTICAST_GROUP, error status:"
1143 /* abort, leaving multicast filtering off */
1144 if_maddr_runlock(ifp
);
1148 if_maddr_runlock(ifp
);
1149 /* Enable multicast filtering */
1150 err
= mxge_send_cmd(sc
, MXGEFW_DISABLE_ALLMULTI
, &cmd
);
1152 device_printf(sc
->dev
, "Failed MXGEFW_DISABLE_ALLMULTI"
1153 ", error status: %d\n", err
);
1158 mxge_max_mtu(mxge_softc_t
*sc
)
1163 if (MJUMPAGESIZE
- MXGEFW_PAD
> MXGEFW_MAX_MTU
)
1164 return MXGEFW_MAX_MTU
- MXGEFW_PAD
;
1166 /* try to set nbufs to see if it we can
1167 use virtually contiguous jumbos */
1169 status
= mxge_send_cmd(sc
, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS
,
1172 return MXGEFW_MAX_MTU
- MXGEFW_PAD
;
1174 /* otherwise, we're limited to MJUMPAGESIZE */
1175 return MJUMPAGESIZE
- MXGEFW_PAD
;
1179 mxge_reset(mxge_softc_t
*sc
, int interrupts_setup
)
1181 struct mxge_slice_state
*ss
;
1182 mxge_rx_done_t
*rx_done
;
1183 volatile uint32_t *irq_claim
;
1187 /* try to send a reset command to the card to see if it
1189 memset(&cmd
, 0, sizeof (cmd
));
1190 status
= mxge_send_cmd(sc
, MXGEFW_CMD_RESET
, &cmd
);
1192 device_printf(sc
->dev
, "failed reset\n");
1196 mxge_dummy_rdma(sc
, 1);
1199 /* set the intrq size */
1200 cmd
.data0
= sc
->rx_ring_size
;
1201 status
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_INTRQ_SIZE
, &cmd
);
1204 * Even though we already know how many slices are supported
1205 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES
1206 * has magic side effects, and must be called after a reset.
1207 * It must be called prior to calling any RSS related cmds,
1208 * including assigning an interrupt queue for anything but
1209 * slice 0. It must also be called *after*
1210 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by
1211 * the firmware to compute offsets.
1214 if (sc
->num_slices
> 1) {
1215 /* ask the maximum number of slices it supports */
1216 status
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_MAX_RSS_QUEUES
,
1219 device_printf(sc
->dev
,
1220 "failed to get number of slices\n");
1224 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior
1225 * to setting up the interrupt queue DMA
1227 cmd
.data0
= sc
->num_slices
;
1228 cmd
.data1
= MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE
;
1229 #ifdef IFNET_BUF_RING
1230 cmd
.data1
|= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES
;
1232 status
= mxge_send_cmd(sc
, MXGEFW_CMD_ENABLE_RSS_QUEUES
,
1235 device_printf(sc
->dev
,
1236 "failed to set number of slices\n");
1242 if (interrupts_setup
) {
1243 /* Now exchange information about interrupts */
1244 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
1245 rx_done
= &sc
->ss
[slice
].rx_done
;
1246 memset(rx_done
->entry
, 0, sc
->rx_ring_size
);
1247 cmd
.data0
= MXGE_LOWPART_TO_U32(rx_done
->dma
.bus_addr
);
1248 cmd
.data1
= MXGE_HIGHPART_TO_U32(rx_done
->dma
.bus_addr
);
1250 status
|= mxge_send_cmd(sc
,
1251 MXGEFW_CMD_SET_INTRQ_DMA
,
1256 status
|= mxge_send_cmd(sc
,
1257 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET
, &cmd
);
1260 sc
->intr_coal_delay_ptr
= (volatile uint32_t *)(sc
->sram
+ cmd
.data0
);
1262 status
|= mxge_send_cmd(sc
, MXGEFW_CMD_GET_IRQ_ACK_OFFSET
, &cmd
);
1263 irq_claim
= (volatile uint32_t *)(sc
->sram
+ cmd
.data0
);
1266 status
|= mxge_send_cmd(sc
, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET
,
1268 sc
->irq_deassert
= (volatile uint32_t *)(sc
->sram
+ cmd
.data0
);
1270 device_printf(sc
->dev
, "failed set interrupt parameters\n");
1275 *sc
->intr_coal_delay_ptr
= htobe32(sc
->intr_coal_delay
);
1278 /* run a DMA benchmark */
1279 (void) mxge_dma_test(sc
, MXGEFW_DMA_TEST
);
1281 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
1282 ss
= &sc
->ss
[slice
];
1284 ss
->irq_claim
= irq_claim
+ (2 * slice
);
1285 /* reset mcp/driver shared state back to 0 */
1286 ss
->rx_done
.idx
= 0;
1287 ss
->rx_done
.cnt
= 0;
1290 ss
->tx
.pkt_done
= 0;
1291 ss
->tx
.queue_active
= 0;
1292 ss
->tx
.activate
= 0;
1293 ss
->tx
.deactivate
= 0;
1298 ss
->rx_small
.cnt
= 0;
1299 ss
->lro_bad_csum
= 0;
1301 ss
->lro_flushed
= 0;
1302 if (ss
->fw_stats
!= NULL
) {
1303 ss
->fw_stats
->valid
= 0;
1304 ss
->fw_stats
->send_done_count
= 0;
1307 sc
->rdma_tags_available
= 15;
1308 status
= mxge_update_mac_address(sc
);
1309 mxge_change_promisc(sc
, sc
->ifp
->if_flags
& IFF_PROMISC
);
1310 mxge_change_pause(sc
, sc
->pause
);
1311 mxge_set_multicast_list(sc
);
1316 mxge_change_intr_coal(SYSCTL_HANDLER_ARGS
)
1319 unsigned int intr_coal_delay
;
1323 intr_coal_delay
= sc
->intr_coal_delay
;
1324 err
= sysctl_handle_int(oidp
, &intr_coal_delay
, arg2
, req
);
1328 if (intr_coal_delay
== sc
->intr_coal_delay
)
1331 if (intr_coal_delay
== 0 || intr_coal_delay
> 1000*1000)
1334 lockmgr(&sc
->driver_lock
, LK_EXCLUSIVE
);
1335 *sc
->intr_coal_delay_ptr
= htobe32(intr_coal_delay
);
1336 sc
->intr_coal_delay
= intr_coal_delay
;
1338 lockmgr(&sc
->driver_lock
, LK_RELEASE
);
1343 mxge_change_flow_control(SYSCTL_HANDLER_ARGS
)
1346 unsigned int enabled
;
1350 enabled
= sc
->pause
;
1351 err
= sysctl_handle_int(oidp
, &enabled
, arg2
, req
);
1355 if (enabled
== sc
->pause
)
1358 lockmgr(&sc
->driver_lock
, LK_EXCLUSIVE
);
1359 err
= mxge_change_pause(sc
, enabled
);
1360 lockmgr(&sc
->driver_lock
, LK_RELEASE
);
1365 mxge_change_lro_locked(mxge_softc_t
*sc
, int lro_cnt
)
1372 ifp
->if_capenable
&= ~IFCAP_LRO
;
1374 ifp
->if_capenable
|= IFCAP_LRO
;
1375 sc
->lro_cnt
= lro_cnt
;
1376 if (ifp
->if_drv_flags
& IFF_DRV_RUNNING
) {
1378 err
= mxge_open(sc
);
1384 mxge_change_lro(SYSCTL_HANDLER_ARGS
)
1387 unsigned int lro_cnt
;
1391 lro_cnt
= sc
->lro_cnt
;
1392 err
= sysctl_handle_int(oidp
, &lro_cnt
, arg2
, req
);
1396 if (lro_cnt
== sc
->lro_cnt
)
1402 lockmgr(&sc
->driver_lock
, LK_EXCLUSIVE
);
1403 err
= mxge_change_lro_locked(sc
, lro_cnt
);
1404 lockmgr(&sc
->driver_lock
, LK_RELEASE
);
1409 mxge_handle_be32(SYSCTL_HANDLER_ARGS
)
1415 arg2
= be32toh(*(int *)arg1
);
1417 err
= sysctl_handle_int(oidp
, arg1
, arg2
, req
);
1423 mxge_rem_sysctls(mxge_softc_t
*sc
)
1425 struct mxge_slice_state
*ss
;
1428 if (sc
->slice_sysctl_tree
== NULL
)
1431 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
1432 ss
= &sc
->ss
[slice
];
1433 if (ss
== NULL
|| ss
->sysctl_tree
== NULL
)
1435 sysctl_ctx_free(&ss
->sysctl_ctx
);
1436 ss
->sysctl_tree
= NULL
;
1438 sysctl_ctx_free(&sc
->slice_sysctl_ctx
);
1439 sc
->slice_sysctl_tree
= NULL
;
1443 mxge_add_sysctls(mxge_softc_t
*sc
)
1445 struct sysctl_ctx_list
*ctx
;
1446 struct sysctl_oid_list
*children
;
1448 struct mxge_slice_state
*ss
;
1452 ctx
= device_get_sysctl_ctx(sc
->dev
);
1453 children
= SYSCTL_CHILDREN(device_get_sysctl_tree(sc
->dev
));
1454 fw
= sc
->ss
[0].fw_stats
;
1456 /* random information */
1457 SYSCTL_ADD_STRING(ctx
, children
, OID_AUTO
,
1459 CTLFLAG_RD
, &sc
->fw_version
,
1460 0, "firmware version");
1461 SYSCTL_ADD_STRING(ctx
, children
, OID_AUTO
,
1463 CTLFLAG_RD
, &sc
->serial_number_string
,
1464 0, "serial number");
1465 SYSCTL_ADD_STRING(ctx
, children
, OID_AUTO
,
1467 CTLFLAG_RD
, &sc
->product_code_string
,
1469 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1471 CTLFLAG_RD
, &sc
->link_width
,
1473 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1475 CTLFLAG_RD
, &sc
->tx_boundary
,
1477 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1479 CTLFLAG_RD
, &sc
->wc
,
1480 0, "write combining PIO?");
1481 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1483 CTLFLAG_RD
, &sc
->read_dma
,
1484 0, "DMA Read speed in MB/s");
1485 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1487 CTLFLAG_RD
, &sc
->write_dma
,
1488 0, "DMA Write speed in MB/s");
1489 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1490 "read_write_dma_MBs",
1491 CTLFLAG_RD
, &sc
->read_write_dma
,
1492 0, "DMA concurrent Read/Write speed in MB/s");
1495 /* performance related tunables */
1496 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1498 CTLTYPE_INT
|CTLFLAG_RW
, sc
,
1499 0, mxge_change_intr_coal
,
1500 "I", "interrupt coalescing delay in usecs");
1502 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1503 "flow_control_enabled",
1504 CTLTYPE_INT
|CTLFLAG_RW
, sc
,
1505 0, mxge_change_flow_control
,
1506 "I", "interrupt coalescing delay in usecs");
1508 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1510 CTLFLAG_RW
, &mxge_deassert_wait
,
1511 0, "Wait for IRQ line to go low in ihandler");
1513 /* stats block from firmware is in network byte order.
1515 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1517 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->link_up
,
1518 0, mxge_handle_be32
,
1520 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1521 "rdma_tags_available",
1522 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->rdma_tags_available
,
1523 0, mxge_handle_be32
,
1524 "I", "rdma_tags_available");
1525 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1526 "dropped_bad_crc32",
1527 CTLTYPE_INT
|CTLFLAG_RD
,
1528 &fw
->dropped_bad_crc32
,
1529 0, mxge_handle_be32
,
1530 "I", "dropped_bad_crc32");
1531 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1533 CTLTYPE_INT
|CTLFLAG_RD
,
1534 &fw
->dropped_bad_phy
,
1535 0, mxge_handle_be32
,
1536 "I", "dropped_bad_phy");
1537 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1538 "dropped_link_error_or_filtered",
1539 CTLTYPE_INT
|CTLFLAG_RD
,
1540 &fw
->dropped_link_error_or_filtered
,
1541 0, mxge_handle_be32
,
1542 "I", "dropped_link_error_or_filtered");
1543 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1544 "dropped_link_overflow",
1545 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_link_overflow
,
1546 0, mxge_handle_be32
,
1547 "I", "dropped_link_overflow");
1548 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1549 "dropped_multicast_filtered",
1550 CTLTYPE_INT
|CTLFLAG_RD
,
1551 &fw
->dropped_multicast_filtered
,
1552 0, mxge_handle_be32
,
1553 "I", "dropped_multicast_filtered");
1554 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1555 "dropped_no_big_buffer",
1556 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_no_big_buffer
,
1557 0, mxge_handle_be32
,
1558 "I", "dropped_no_big_buffer");
1559 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1560 "dropped_no_small_buffer",
1561 CTLTYPE_INT
|CTLFLAG_RD
,
1562 &fw
->dropped_no_small_buffer
,
1563 0, mxge_handle_be32
,
1564 "I", "dropped_no_small_buffer");
1565 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1567 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_overrun
,
1568 0, mxge_handle_be32
,
1569 "I", "dropped_overrun");
1570 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1572 CTLTYPE_INT
|CTLFLAG_RD
,
1574 0, mxge_handle_be32
,
1575 "I", "dropped_pause");
1576 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1578 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_runt
,
1579 0, mxge_handle_be32
,
1580 "I", "dropped_runt");
1582 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1583 "dropped_unicast_filtered",
1584 CTLTYPE_INT
|CTLFLAG_RD
, &fw
->dropped_unicast_filtered
,
1585 0, mxge_handle_be32
,
1586 "I", "dropped_unicast_filtered");
1588 /* verbose printing? */
1589 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1591 CTLFLAG_RW
, &mxge_verbose
,
1592 0, "verbose printing");
1595 SYSCTL_ADD_PROC(ctx
, children
, OID_AUTO
,
1597 CTLTYPE_INT
|CTLFLAG_RW
, sc
,
1599 "I", "number of lro merge queues");
1602 /* add counters exported for debugging from all slices */
1603 sysctl_ctx_init(&sc
->slice_sysctl_ctx
);
1604 sc
->slice_sysctl_tree
=
1605 SYSCTL_ADD_NODE(&sc
->slice_sysctl_ctx
, children
, OID_AUTO
,
1606 "slice", CTLFLAG_RD
, 0, "");
1608 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
1609 ss
= &sc
->ss
[slice
];
1610 sysctl_ctx_init(&ss
->sysctl_ctx
);
1611 ctx
= &ss
->sysctl_ctx
;
1612 children
= SYSCTL_CHILDREN(sc
->slice_sysctl_tree
);
1613 sprintf(slice_num
, "%d", slice
);
1615 SYSCTL_ADD_NODE(ctx
, children
, OID_AUTO
, slice_num
,
1617 children
= SYSCTL_CHILDREN(ss
->sysctl_tree
);
1618 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1620 CTLFLAG_RD
, &ss
->rx_small
.cnt
,
1622 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1624 CTLFLAG_RD
, &ss
->rx_big
.cnt
,
1626 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1627 "lro_flushed", CTLFLAG_RD
, &ss
->lro_flushed
,
1628 0, "number of lro merge queues flushed");
1630 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1631 "lro_queued", CTLFLAG_RD
, &ss
->lro_queued
,
1632 0, "number of frames appended to lro merge"
1635 #ifndef IFNET_BUF_RING
1636 /* only transmit from slice 0 for now */
1640 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1642 CTLFLAG_RD
, &ss
->tx
.req
,
1645 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1647 CTLFLAG_RD
, &ss
->tx
.done
,
1649 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1651 CTLFLAG_RD
, &ss
->tx
.pkt_done
,
1653 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1655 CTLFLAG_RD
, &ss
->tx
.stall
,
1657 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1659 CTLFLAG_RD
, &ss
->tx
.wake
,
1661 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1663 CTLFLAG_RD
, &ss
->tx
.defrag
,
1665 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1667 CTLFLAG_RD
, &ss
->tx
.queue_active
,
1668 0, "tx_queue_active");
1669 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1671 CTLFLAG_RD
, &ss
->tx
.activate
,
1673 SYSCTL_ADD_INT(ctx
, children
, OID_AUTO
,
1675 CTLFLAG_RD
, &ss
->tx
.deactivate
,
1676 0, "tx_deactivate");
1680 /* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1681 backwards one at a time and handle ring wraps */
1684 mxge_submit_req_backwards(mxge_tx_ring_t
*tx
,
1685 mcp_kreq_ether_send_t
*src
, int cnt
)
1687 int idx
, starting_slot
;
1688 starting_slot
= tx
->req
;
1691 idx
= (starting_slot
+ cnt
) & tx
->mask
;
1692 mxge_pio_copy(&tx
->lanai
[idx
],
1693 &src
[cnt
], sizeof(*src
));
1699 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy
1700 * at most 32 bytes at a time, so as to avoid involving the software
1701 * pio handler in the nic. We re-write the first segment's flags
1702 * to mark them valid only after writing the entire chain
1706 mxge_submit_req(mxge_tx_ring_t
*tx
, mcp_kreq_ether_send_t
*src
,
1711 volatile uint32_t *dst_ints
;
1712 mcp_kreq_ether_send_t
*srcp
;
1713 volatile mcp_kreq_ether_send_t
*dstp
, *dst
;
1716 idx
= tx
->req
& tx
->mask
;
1718 last_flags
= src
->flags
;
1721 dst
= dstp
= &tx
->lanai
[idx
];
1724 if ((idx
+ cnt
) < tx
->mask
) {
1725 for (i
= 0; i
< (cnt
- 1); i
+= 2) {
1726 mxge_pio_copy(dstp
, srcp
, 2 * sizeof(*src
));
1727 wmb(); /* force write every 32 bytes */
1732 /* submit all but the first request, and ensure
1733 that it is submitted below */
1734 mxge_submit_req_backwards(tx
, src
, cnt
);
1738 /* submit the first request */
1739 mxge_pio_copy(dstp
, srcp
, sizeof(*src
));
1740 wmb(); /* barrier before setting valid flag */
1743 /* re-write the last 32-bits with the valid flags */
1744 src
->flags
= last_flags
;
1745 src_ints
= (uint32_t *)src
;
1747 dst_ints
= (volatile uint32_t *)dst
;
1749 *dst_ints
= *src_ints
;
1757 mxge_encap_tso(struct mxge_slice_state
*ss
, struct mbuf
*m
,
1758 int busdma_seg_cnt
, int ip_off
)
1761 mcp_kreq_ether_send_t
*req
;
1762 bus_dma_segment_t
*seg
;
1765 uint32_t low
, high_swapped
;
1766 int len
, seglen
, cum_len
, cum_len_next
;
1767 int next_is_first
, chop
, cnt
, rdma_count
, small
;
1768 uint16_t pseudo_hdr_offset
, cksum_offset
, mss
;
1769 uint8_t flags
, flags_next
;
1772 mss
= m
->m_pkthdr
.tso_segsz
;
1774 /* negative cum_len signifies to the
1775 * send loop that we are still in the
1776 * header portion of the TSO packet.
1779 /* ensure we have the ethernet, IP and TCP
1780 header together in the first mbuf, copy
1781 it to a scratch buffer if not */
1782 if (__predict_false(m
->m_len
< ip_off
+ sizeof (*ip
))) {
1783 m_copydata(m
, 0, ip_off
+ sizeof (*ip
),
1785 ip
= (struct ip
*)(ss
->scratch
+ ip_off
);
1787 ip
= (struct ip
*)(mtod(m
, char *) + ip_off
);
1789 if (__predict_false(m
->m_len
< ip_off
+ (ip
->ip_hl
<< 2)
1791 m_copydata(m
, 0, ip_off
+ (ip
->ip_hl
<< 2)
1792 + sizeof (*tcp
), ss
->scratch
);
1793 ip
= (struct ip
*)(mtod(m
, char *) + ip_off
);
1796 tcp
= (struct tcphdr
*)((char *)ip
+ (ip
->ip_hl
<< 2));
1797 cum_len
= -(ip_off
+ ((ip
->ip_hl
+ tcp
->th_off
) << 2));
1799 /* TSO implies checksum offload on this hardware */
1800 cksum_offset
= ip_off
+ (ip
->ip_hl
<< 2);
1801 flags
= MXGEFW_FLAGS_TSO_HDR
| MXGEFW_FLAGS_FIRST
;
1804 /* for TSO, pseudo_hdr_offset holds mss.
1805 * The firmware figures out where to put
1806 * the checksum by parsing the header. */
1807 pseudo_hdr_offset
= htobe16(mss
);
1814 /* "rdma_count" is the number of RDMAs belonging to the
1815 * current packet BEFORE the current send request. For
1816 * non-TSO packets, this is equal to "count".
1817 * For TSO packets, rdma_count needs to be reset
1818 * to 0 after a segment cut.
1820 * The rdma_count field of the send request is
1821 * the number of RDMAs of the packet starting at
1822 * that request. For TSO send requests with one ore more cuts
1823 * in the middle, this is the number of RDMAs starting
1824 * after the last cut in the request. All previous
1825 * segments before the last cut implicitly have 1 RDMA.
1827 * Since the number of RDMAs is not known beforehand,
1828 * it must be filled-in retroactively - after each
1829 * segmentation cut or at the end of the entire packet.
1832 while (busdma_seg_cnt
) {
1833 /* Break the busdma segment up into pieces*/
1834 low
= MXGE_LOWPART_TO_U32(seg
->ds_addr
);
1835 high_swapped
= htobe32(MXGE_HIGHPART_TO_U32(seg
->ds_addr
));
1839 flags_next
= flags
& ~MXGEFW_FLAGS_FIRST
;
1841 cum_len_next
= cum_len
+ seglen
;
1842 (req
-rdma_count
)->rdma_count
= rdma_count
+ 1;
1843 if (__predict_true(cum_len
>= 0)) {
1845 chop
= (cum_len_next
> mss
);
1846 cum_len_next
= cum_len_next
% mss
;
1847 next_is_first
= (cum_len_next
== 0);
1848 flags
|= chop
* MXGEFW_FLAGS_TSO_CHOP
;
1849 flags_next
|= next_is_first
*
1851 rdma_count
|= -(chop
| next_is_first
);
1852 rdma_count
+= chop
& !next_is_first
;
1853 } else if (cum_len_next
>= 0) {
1858 small
= (mss
<= MXGEFW_SEND_SMALL_SIZE
);
1859 flags_next
= MXGEFW_FLAGS_TSO_PLD
|
1860 MXGEFW_FLAGS_FIRST
|
1861 (small
* MXGEFW_FLAGS_SMALL
);
1864 req
->addr_high
= high_swapped
;
1865 req
->addr_low
= htobe32(low
);
1866 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
1868 req
->rdma_count
= 1;
1869 req
->length
= htobe16(seglen
);
1870 req
->cksum_offset
= cksum_offset
;
1871 req
->flags
= flags
| ((cum_len
& 1) *
1872 MXGEFW_FLAGS_ALIGN_ODD
);
1875 cum_len
= cum_len_next
;
1880 if (__predict_false(cksum_offset
> seglen
))
1881 cksum_offset
-= seglen
;
1884 if (__predict_false(cnt
> tx
->max_desc
))
1890 (req
-rdma_count
)->rdma_count
= rdma_count
;
1894 req
->flags
|= MXGEFW_FLAGS_TSO_LAST
;
1895 } while (!(req
->flags
& (MXGEFW_FLAGS_TSO_CHOP
| MXGEFW_FLAGS_FIRST
)));
1897 tx
->info
[((cnt
- 1) + tx
->req
) & tx
->mask
].flag
= 1;
1898 mxge_submit_req(tx
, tx
->req_list
, cnt
);
1899 #ifdef IFNET_BUF_RING
1900 if ((ss
->sc
->num_slices
> 1) && tx
->queue_active
== 0) {
1901 /* tell the NIC to start polling this slice */
1903 tx
->queue_active
= 1;
1911 bus_dmamap_unload(tx
->dmat
, tx
->info
[tx
->req
& tx
->mask
].map
);
1915 printf("tx->max_desc exceeded via TSO!\n");
1916 printf("mss = %d, %ld, %d!\n", mss
,
1917 (long)seg
- (long)tx
->seg_list
, tx
->max_desc
);
1924 #endif /* IFCAP_TSO4 */
1926 #ifdef MXGE_NEW_VLAN_API
1928 * We reproduce the software vlan tag insertion from
1929 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware"
1930 * vlan tag insertion. We need to advertise this in order to have the
1931 * vlan interface respect our csum offload flags.
1933 static struct mbuf
*
1934 mxge_vlan_tag_insert(struct mbuf
*m
)
1936 struct ether_vlan_header
*evl
;
1938 M_PREPEND(m
, ETHER_VLAN_ENCAP_LEN
, M_DONTWAIT
);
1939 if (__predict_false(m
== NULL
))
1941 if (m
->m_len
< sizeof(*evl
)) {
1942 m
= m_pullup(m
, sizeof(*evl
));
1943 if (__predict_false(m
== NULL
))
1947 * Transform the Ethernet header into an Ethernet header
1948 * with 802.1Q encapsulation.
1950 evl
= mtod(m
, struct ether_vlan_header
*);
1951 bcopy((char *)evl
+ ETHER_VLAN_ENCAP_LEN
,
1952 (char *)evl
, ETHER_HDR_LEN
- ETHER_TYPE_LEN
);
1953 evl
->evl_encap_proto
= htons(ETHERTYPE_VLAN
);
1954 evl
->evl_tag
= htons(m
->m_pkthdr
.ether_vtag
);
1955 m
->m_flags
&= ~M_VLANTAG
;
1958 #endif /* MXGE_NEW_VLAN_API */
1961 mxge_encap(struct mxge_slice_state
*ss
, struct mbuf
*m
)
1964 mcp_kreq_ether_send_t
*req
;
1965 bus_dma_segment_t
*seg
;
1970 int cnt
, cum_len
, err
, i
, idx
, odd_flag
, ip_off
;
1971 uint16_t pseudo_hdr_offset
;
1972 uint8_t flags
, cksum_offset
;
1979 ip_off
= sizeof (struct ether_header
);
1980 #ifdef MXGE_NEW_VLAN_API
1981 if (m
->m_flags
& M_VLANTAG
) {
1982 m
= mxge_vlan_tag_insert(m
);
1983 if (__predict_false(m
== NULL
))
1985 ip_off
+= ETHER_VLAN_ENCAP_LEN
;
1988 /* (try to) map the frame for DMA */
1989 idx
= tx
->req
& tx
->mask
;
1990 err
= bus_dmamap_load_mbuf_sg(tx
->dmat
, tx
->info
[idx
].map
,
1991 m
, tx
->seg_list
, &cnt
,
1993 if (__predict_false(err
== EFBIG
)) {
1994 /* Too many segments in the chain. Try
1996 m_tmp
= m_defrag(m
, M_NOWAIT
);
1997 if (m_tmp
== NULL
) {
2002 err
= bus_dmamap_load_mbuf_sg(tx
->dmat
,
2004 m
, tx
->seg_list
, &cnt
,
2007 if (__predict_false(err
!= 0)) {
2008 device_printf(sc
->dev
, "bus_dmamap_load_mbuf_sg returned %d"
2009 " packet len = %d\n", err
, m
->m_pkthdr
.len
);
2012 bus_dmamap_sync(tx
->dmat
, tx
->info
[idx
].map
,
2013 BUS_DMASYNC_PREWRITE
);
2014 tx
->info
[idx
].m
= m
;
2017 /* TSO is different enough, we handle it in another routine */
2018 if (m
->m_pkthdr
.csum_flags
& (CSUM_TSO
)) {
2019 mxge_encap_tso(ss
, m
, cnt
, ip_off
);
2026 pseudo_hdr_offset
= 0;
2027 flags
= MXGEFW_FLAGS_NO_TSO
;
2029 /* checksum offloading? */
2030 if (m
->m_pkthdr
.csum_flags
& (CSUM_DELAY_DATA
)) {
2031 /* ensure ip header is in first mbuf, copy
2032 it to a scratch buffer if not */
2033 if (__predict_false(m
->m_len
< ip_off
+ sizeof (*ip
))) {
2034 m_copydata(m
, 0, ip_off
+ sizeof (*ip
),
2036 ip
= (struct ip
*)(ss
->scratch
+ ip_off
);
2038 ip
= (struct ip
*)(mtod(m
, char *) + ip_off
);
2040 cksum_offset
= ip_off
+ (ip
->ip_hl
<< 2);
2041 pseudo_hdr_offset
= cksum_offset
+ m
->m_pkthdr
.csum_data
;
2042 pseudo_hdr_offset
= htobe16(pseudo_hdr_offset
);
2043 req
->cksum_offset
= cksum_offset
;
2044 flags
|= MXGEFW_FLAGS_CKSUM
;
2045 odd_flag
= MXGEFW_FLAGS_ALIGN_ODD
;
2049 if (m
->m_pkthdr
.len
< MXGEFW_SEND_SMALL_SIZE
)
2050 flags
|= MXGEFW_FLAGS_SMALL
;
2052 /* convert segments into a request list */
2055 req
->flags
= MXGEFW_FLAGS_FIRST
;
2056 for (i
= 0; i
< cnt
; i
++) {
2058 htobe32(MXGE_LOWPART_TO_U32(seg
->ds_addr
));
2060 htobe32(MXGE_HIGHPART_TO_U32(seg
->ds_addr
));
2061 req
->length
= htobe16(seg
->ds_len
);
2062 req
->cksum_offset
= cksum_offset
;
2063 if (cksum_offset
> seg
->ds_len
)
2064 cksum_offset
-= seg
->ds_len
;
2067 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
2068 req
->pad
= 0; /* complete solid 16-byte block */
2069 req
->rdma_count
= 1;
2070 req
->flags
|= flags
| ((cum_len
& 1) * odd_flag
);
2071 cum_len
+= seg
->ds_len
;
2077 /* pad runts to 60 bytes */
2081 htobe32(MXGE_LOWPART_TO_U32(sc
->zeropad_dma
.bus_addr
));
2083 htobe32(MXGE_HIGHPART_TO_U32(sc
->zeropad_dma
.bus_addr
));
2084 req
->length
= htobe16(60 - cum_len
);
2085 req
->cksum_offset
= 0;
2086 req
->pseudo_hdr_offset
= pseudo_hdr_offset
;
2087 req
->pad
= 0; /* complete solid 16-byte block */
2088 req
->rdma_count
= 1;
2089 req
->flags
|= flags
| ((cum_len
& 1) * odd_flag
);
2093 tx
->req_list
[0].rdma_count
= cnt
;
2095 /* print what the firmware will see */
2096 for (i
= 0; i
< cnt
; i
++) {
2097 printf("%d: addr: 0x%x 0x%x len:%d pso%d,"
2098 "cso:%d, flags:0x%x, rdma:%d\n",
2099 i
, (int)ntohl(tx
->req_list
[i
].addr_high
),
2100 (int)ntohl(tx
->req_list
[i
].addr_low
),
2101 (int)ntohs(tx
->req_list
[i
].length
),
2102 (int)ntohs(tx
->req_list
[i
].pseudo_hdr_offset
),
2103 tx
->req_list
[i
].cksum_offset
, tx
->req_list
[i
].flags
,
2104 tx
->req_list
[i
].rdma_count
);
2106 printf("--------------\n");
2108 tx
->info
[((cnt
- 1) + tx
->req
) & tx
->mask
].flag
= 1;
2109 mxge_submit_req(tx
, tx
->req_list
, cnt
);
2110 #ifdef IFNET_BUF_RING
2111 if ((ss
->sc
->num_slices
> 1) && tx
->queue_active
== 0) {
2112 /* tell the NIC to start polling this slice */
2114 tx
->queue_active
= 1;
2127 #ifdef IFNET_BUF_RING
2129 mxge_qflush(struct ifnet
*ifp
)
2131 mxge_softc_t
*sc
= ifp
->if_softc
;
2136 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
2137 tx
= &sc
->ss
[slice
].tx
;
2138 lockmgr(&tx
->lock
, LK_EXCLUSIVE
);
2139 while ((m
= buf_ring_dequeue_sc(tx
->br
)) != NULL
)
2141 lockmgr(&tx
->lock
, LK_RELEASE
);
2147 mxge_start_locked(struct mxge_slice_state
*ss
)
2158 while ((tx
->mask
- (tx
->req
- tx
->done
)) > tx
->max_desc
) {
2159 m
= drbr_dequeue(ifp
, tx
->br
);
2163 /* let BPF see it */
2166 /* give it to the nic */
2169 /* ran out of transmit slots */
2170 if (((ss
->if_drv_flags
& IFF_DRV_OACTIVE
) == 0)
2171 && (!drbr_empty(ifp
, tx
->br
))) {
2172 ss
->if_drv_flags
|= IFF_DRV_OACTIVE
;
2178 mxge_transmit_locked(struct mxge_slice_state
*ss
, struct mbuf
*m
)
2189 if ((ss
->if_drv_flags
& (IFF_DRV_RUNNING
|IFF_DRV_OACTIVE
)) !=
2191 err
= drbr_enqueue(ifp
, tx
->br
, m
);
2195 if (drbr_empty(ifp
, tx
->br
) &&
2196 ((tx
->mask
- (tx
->req
- tx
->done
)) > tx
->max_desc
)) {
2197 /* let BPF see it */
2199 /* give it to the nic */
2201 } else if ((err
= drbr_enqueue(ifp
, tx
->br
, m
)) != 0) {
2204 if (!drbr_empty(ifp
, tx
->br
))
2205 mxge_start_locked(ss
);
2210 mxge_transmit(struct ifnet
*ifp
, struct mbuf
*m
)
2212 mxge_softc_t
*sc
= ifp
->if_softc
;
2213 struct mxge_slice_state
*ss
;
2218 slice
= m
->m_pkthdr
.flowid
;
2219 slice
&= (sc
->num_slices
- 1); /* num_slices always power of 2 */
2221 ss
= &sc
->ss
[slice
];
2224 if (lockmgr(&tx
->lock
, LK_EXCLUSIVE
|LK_NOWAIT
)) {
2225 err
= mxge_transmit_locked(ss
, m
);
2226 lockmgr(&tx
->lock
, LK_RELEASE
);
2228 err
= drbr_enqueue(ifp
, tx
->br
, m
);
2237 mxge_start_locked(struct mxge_slice_state
*ss
)
2247 while ((tx
->mask
- (tx
->req
- tx
->done
)) > tx
->max_desc
) {
2248 IFQ_DRV_DEQUEUE(&ifp
->if_snd
, m
);
2252 /* let BPF see it */
2255 /* give it to the nic */
2258 /* ran out of transmit slots */
2259 if ((sc
->ifp
->if_drv_flags
& IFF_DRV_OACTIVE
) == 0) {
2260 sc
->ifp
->if_drv_flags
|= IFF_DRV_OACTIVE
;
2266 mxge_start(struct ifnet
*ifp
)
2268 mxge_softc_t
*sc
= ifp
->if_softc
;
2269 struct mxge_slice_state
*ss
;
2271 /* only use the first slice for now */
2273 lockmgr(&ss
->tx
.lock
, LK_EXCLUSIVE
);
2274 mxge_start_locked(ss
);
2275 lockmgr(&ss
->tx
.lock
, LK_RELEASE
);
2279 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy
2280 * at most 32 bytes at a time, so as to avoid involving the software
2281 * pio handler in the nic. We re-write the first segment's low
2282 * DMA address to mark it valid only after we write the entire chunk
2286 mxge_submit_8rx(volatile mcp_kreq_ether_recv_t
*dst
,
2287 mcp_kreq_ether_recv_t
*src
)
2291 low
= src
->addr_low
;
2292 src
->addr_low
= 0xffffffff;
2293 mxge_pio_copy(dst
, src
, 4 * sizeof (*src
));
2295 mxge_pio_copy(dst
+ 4, src
+ 4, 4 * sizeof (*src
));
2297 src
->addr_low
= low
;
2298 dst
->addr_low
= low
;
2303 mxge_get_buf_small(struct mxge_slice_state
*ss
, bus_dmamap_t map
, int idx
)
2305 bus_dma_segment_t seg
;
2307 mxge_rx_ring_t
*rx
= &ss
->rx_small
;
2310 m
= m_gethdr(M_DONTWAIT
, MT_DATA
);
2317 err
= bus_dmamap_load_mbuf_sg(rx
->dmat
, map
, m
,
2318 &seg
, &cnt
, BUS_DMA_NOWAIT
);
2323 rx
->info
[idx
].m
= m
;
2324 rx
->shadow
[idx
].addr_low
=
2325 htobe32(MXGE_LOWPART_TO_U32(seg
.ds_addr
));
2326 rx
->shadow
[idx
].addr_high
=
2327 htobe32(MXGE_HIGHPART_TO_U32(seg
.ds_addr
));
2331 mxge_submit_8rx(&rx
->lanai
[idx
- 7], &rx
->shadow
[idx
- 7]);
2336 mxge_get_buf_big(struct mxge_slice_state
*ss
, bus_dmamap_t map
, int idx
)
2338 bus_dma_segment_t seg
[3];
2340 mxge_rx_ring_t
*rx
= &ss
->rx_big
;
2343 if (rx
->cl_size
== MCLBYTES
)
2344 m
= m_getcl(M_DONTWAIT
, MT_DATA
, M_PKTHDR
);
2346 m
= m_getjcl(M_DONTWAIT
, MT_DATA
, M_PKTHDR
, rx
->cl_size
);
2352 m
->m_len
= rx
->mlen
;
2353 err
= bus_dmamap_load_mbuf_sg(rx
->dmat
, map
, m
,
2354 seg
, &cnt
, BUS_DMA_NOWAIT
);
2359 rx
->info
[idx
].m
= m
;
2360 rx
->shadow
[idx
].addr_low
=
2361 htobe32(MXGE_LOWPART_TO_U32(seg
->ds_addr
));
2362 rx
->shadow
[idx
].addr_high
=
2363 htobe32(MXGE_HIGHPART_TO_U32(seg
->ds_addr
));
2365 #if MXGE_VIRT_JUMBOS
2366 for (i
= 1; i
< cnt
; i
++) {
2367 rx
->shadow
[idx
+ i
].addr_low
=
2368 htobe32(MXGE_LOWPART_TO_U32(seg
[i
].ds_addr
));
2369 rx
->shadow
[idx
+ i
].addr_high
=
2370 htobe32(MXGE_HIGHPART_TO_U32(seg
[i
].ds_addr
));
2375 for (i
= 0; i
< rx
->nbufs
; i
++) {
2376 if ((idx
& 7) == 7) {
2377 mxge_submit_8rx(&rx
->lanai
[idx
- 7],
2378 &rx
->shadow
[idx
- 7]);
2386 * Myri10GE hardware checksums are not valid if the sender
2387 * padded the frame with non-zero padding. This is because
2388 * the firmware just does a simple 16-bit 1s complement
2389 * checksum across the entire frame, excluding the first 14
2390 * bytes. It is best to simply to check the checksum and
2391 * tell the stack about it only if the checksum is good
2394 static inline uint16_t
2395 mxge_rx_csum(struct mbuf
*m
, int csum
)
2397 struct ether_header
*eh
;
2401 eh
= mtod(m
, struct ether_header
*);
2403 /* only deal with IPv4 TCP & UDP for now */
2404 if (__predict_false(eh
->ether_type
!= htons(ETHERTYPE_IP
)))
2406 ip
= (struct ip
*)(eh
+ 1);
2407 if (__predict_false(ip
->ip_p
!= IPPROTO_TCP
&&
2408 ip
->ip_p
!= IPPROTO_UDP
))
2411 c
= in_pseudo(ip
->ip_src
.s_addr
, ip
->ip_dst
.s_addr
,
2412 htonl(ntohs(csum
) + ntohs(ip
->ip_len
) +
2413 - (ip
->ip_hl
<< 2) + ip
->ip_p
));
2422 mxge_vlan_tag_remove(struct mbuf
*m
, uint32_t *csum
)
2424 struct ether_vlan_header
*evl
;
2425 struct ether_header
*eh
;
2428 evl
= mtod(m
, struct ether_vlan_header
*);
2429 eh
= mtod(m
, struct ether_header
*);
2432 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes
2433 * after what the firmware thought was the end of the ethernet
2437 /* put checksum into host byte order */
2438 *csum
= ntohs(*csum
);
2439 partial
= ntohl(*(uint32_t *)(mtod(m
, char *) + ETHER_HDR_LEN
));
2440 (*csum
) += ~partial
;
2441 (*csum
) += ((*csum
) < ~partial
);
2442 (*csum
) = ((*csum
) >> 16) + ((*csum
) & 0xFFFF);
2443 (*csum
) = ((*csum
) >> 16) + ((*csum
) & 0xFFFF);
2445 /* restore checksum to network byte order;
2446 later consumers expect this */
2447 *csum
= htons(*csum
);
2450 #ifdef MXGE_NEW_VLAN_API
2451 m
->m_pkthdr
.ether_vtag
= ntohs(evl
->evl_tag
);
2455 mtag
= m_tag_alloc(MTAG_VLAN
, MTAG_VLAN_TAG
, sizeof(u_int
),
2459 VLAN_TAG_VALUE(mtag
) = ntohs(evl
->evl_tag
);
2460 m_tag_prepend(m
, mtag
);
2464 m
->m_flags
|= M_VLANTAG
;
2467 * Remove the 802.1q header by copying the Ethernet
2468 * addresses over it and adjusting the beginning of
2469 * the data in the mbuf. The encapsulated Ethernet
2470 * type field is already in place.
2472 bcopy((char *)evl
, (char *)evl
+ ETHER_VLAN_ENCAP_LEN
,
2473 ETHER_HDR_LEN
- ETHER_TYPE_LEN
);
2474 m_adj(m
, ETHER_VLAN_ENCAP_LEN
);
2479 mxge_rx_done_big(struct mxge_slice_state
*ss
, uint32_t len
, uint32_t csum
)
2484 struct ether_header
*eh
;
2486 bus_dmamap_t old_map
;
2488 uint16_t tcpudp_csum
;
2493 idx
= rx
->cnt
& rx
->mask
;
2494 rx
->cnt
+= rx
->nbufs
;
2495 /* save a pointer to the received mbuf */
2496 m
= rx
->info
[idx
].m
;
2497 /* try to replace the received mbuf */
2498 if (mxge_get_buf_big(ss
, rx
->extra_map
, idx
)) {
2499 /* drop the frame -- the old mbuf is re-cycled */
2504 /* unmap the received buffer */
2505 old_map
= rx
->info
[idx
].map
;
2506 bus_dmamap_sync(rx
->dmat
, old_map
, BUS_DMASYNC_POSTREAD
);
2507 bus_dmamap_unload(rx
->dmat
, old_map
);
2509 /* swap the bus_dmamap_t's */
2510 rx
->info
[idx
].map
= rx
->extra_map
;
2511 rx
->extra_map
= old_map
;
2513 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2515 m
->m_data
+= MXGEFW_PAD
;
2517 m
->m_pkthdr
.rcvif
= ifp
;
2518 m
->m_len
= m
->m_pkthdr
.len
= len
;
2520 eh
= mtod(m
, struct ether_header
*);
2521 if (eh
->ether_type
== htons(ETHERTYPE_VLAN
)) {
2522 mxge_vlan_tag_remove(m
, &csum
);
2524 /* if the checksum is valid, mark it in the mbuf header */
2525 if (sc
->csum_flag
&& (0 == (tcpudp_csum
= mxge_rx_csum(m
, csum
)))) {
2526 if (sc
->lro_cnt
&& (0 == mxge_lro_rx(ss
, m
, csum
)))
2528 /* otherwise, it was a UDP frame, or a TCP frame which
2529 we could not do LRO on. Tell the stack that the
2531 m
->m_pkthdr
.csum_data
= 0xffff;
2532 m
->m_pkthdr
.csum_flags
= CSUM_PSEUDO_HDR
| CSUM_DATA_VALID
;
2534 /* flowid only valid if RSS hashing is enabled */
2535 if (sc
->num_slices
> 1) {
2536 m
->m_pkthdr
.flowid
= (ss
- sc
->ss
);
2537 m
->m_flags
|= M_FLOWID
;
2539 /* pass the frame up the stack */
2540 (*ifp
->if_input
)(ifp
, m
);
2544 mxge_rx_done_small(struct mxge_slice_state
*ss
, uint32_t len
, uint32_t csum
)
2548 struct ether_header
*eh
;
2551 bus_dmamap_t old_map
;
2553 uint16_t tcpudp_csum
;
2558 idx
= rx
->cnt
& rx
->mask
;
2560 /* save a pointer to the received mbuf */
2561 m
= rx
->info
[idx
].m
;
2562 /* try to replace the received mbuf */
2563 if (mxge_get_buf_small(ss
, rx
->extra_map
, idx
)) {
2564 /* drop the frame -- the old mbuf is re-cycled */
2569 /* unmap the received buffer */
2570 old_map
= rx
->info
[idx
].map
;
2571 bus_dmamap_sync(rx
->dmat
, old_map
, BUS_DMASYNC_POSTREAD
);
2572 bus_dmamap_unload(rx
->dmat
, old_map
);
2574 /* swap the bus_dmamap_t's */
2575 rx
->info
[idx
].map
= rx
->extra_map
;
2576 rx
->extra_map
= old_map
;
2578 /* mcp implicitly skips 1st 2 bytes so that packet is properly
2580 m
->m_data
+= MXGEFW_PAD
;
2582 m
->m_pkthdr
.rcvif
= ifp
;
2583 m
->m_len
= m
->m_pkthdr
.len
= len
;
2585 eh
= mtod(m
, struct ether_header
*);
2586 if (eh
->ether_type
== htons(ETHERTYPE_VLAN
)) {
2587 mxge_vlan_tag_remove(m
, &csum
);
2589 /* if the checksum is valid, mark it in the mbuf header */
2590 if (sc
->csum_flag
&& (0 == (tcpudp_csum
= mxge_rx_csum(m
, csum
)))) {
2591 if (sc
->lro_cnt
&& (0 == mxge_lro_rx(ss
, m
, csum
)))
2593 /* otherwise, it was a UDP frame, or a TCP frame which
2594 we could not do LRO on. Tell the stack that the
2596 m
->m_pkthdr
.csum_data
= 0xffff;
2597 m
->m_pkthdr
.csum_flags
= CSUM_PSEUDO_HDR
| CSUM_DATA_VALID
;
2599 /* flowid only valid if RSS hashing is enabled */
2600 if (sc
->num_slices
> 1) {
2601 m
->m_pkthdr
.flowid
= (ss
- sc
->ss
);
2602 m
->m_flags
|= M_FLOWID
;
2604 /* pass the frame up the stack */
2605 (*ifp
->if_input
)(ifp
, m
);
2609 mxge_clean_rx_done(struct mxge_slice_state
*ss
)
2611 mxge_rx_done_t
*rx_done
= &ss
->rx_done
;
2617 while (rx_done
->entry
[rx_done
->idx
].length
!= 0) {
2618 length
= ntohs(rx_done
->entry
[rx_done
->idx
].length
);
2619 rx_done
->entry
[rx_done
->idx
].length
= 0;
2620 checksum
= rx_done
->entry
[rx_done
->idx
].checksum
;
2621 if (length
<= (MHLEN
- MXGEFW_PAD
))
2622 mxge_rx_done_small(ss
, length
, checksum
);
2624 mxge_rx_done_big(ss
, length
, checksum
);
2626 rx_done
->idx
= rx_done
->cnt
& rx_done
->mask
;
2628 /* limit potential for livelock */
2629 if (__predict_false(++limit
> rx_done
->mask
/ 2))
2633 while (!SLIST_EMPTY(&ss
->lro_active
)) {
2634 struct lro_entry
*lro
= SLIST_FIRST(&ss
->lro_active
);
2635 SLIST_REMOVE_HEAD(&ss
->lro_active
, next
);
2636 mxge_lro_flush(ss
, lro
);
2643 mxge_tx_done(struct mxge_slice_state
*ss
, uint32_t mcp_idx
)
2654 while (tx
->pkt_done
!= mcp_idx
) {
2655 idx
= tx
->done
& tx
->mask
;
2657 m
= tx
->info
[idx
].m
;
2658 /* mbuf and DMA map only attached to the first
2661 ss
->obytes
+= m
->m_pkthdr
.len
;
2662 if (m
->m_flags
& M_MCAST
)
2665 tx
->info
[idx
].m
= NULL
;
2666 map
= tx
->info
[idx
].map
;
2667 bus_dmamap_unload(tx
->dmat
, map
);
2670 if (tx
->info
[idx
].flag
) {
2671 tx
->info
[idx
].flag
= 0;
2676 /* If we have space, clear IFF_OACTIVE to tell the stack that
2677 its OK to send packets */
2678 #ifdef IFNET_BUF_RING
2679 flags
= &ss
->if_drv_flags
;
2681 flags
= &ifp
->if_drv_flags
;
2683 lockmgr(&ss
->tx
.lock
, LK_EXCLUSIVE
);
2684 if ((*flags
) & IFF_DRV_OACTIVE
&&
2685 tx
->req
- tx
->done
< (tx
->mask
+ 1)/4) {
2686 *(flags
) &= ~IFF_DRV_OACTIVE
;
2688 mxge_start_locked(ss
);
2690 #ifdef IFNET_BUF_RING
2691 if ((ss
->sc
->num_slices
> 1) && (tx
->req
== tx
->done
)) {
2692 /* let the NIC stop polling this queue, since there
2693 * are no more transmits pending */
2694 if (tx
->req
== tx
->done
) {
2696 tx
->queue_active
= 0;
2702 lockmgr(&ss
->tx
.lock
, LK_RELEASE
);
2706 static struct mxge_media_type mxge_xfp_media_types
[] =
2708 {IFM_10G_CX4
, 0x7f, "10GBASE-CX4 (module)"},
2709 {IFM_10G_SR
, (1 << 7), "10GBASE-SR"},
2710 {IFM_10G_LR
, (1 << 6), "10GBASE-LR"},
2711 {0, (1 << 5), "10GBASE-ER"},
2712 {IFM_10G_LRM
, (1 << 4), "10GBASE-LRM"},
2713 {0, (1 << 3), "10GBASE-SW"},
2714 {0, (1 << 2), "10GBASE-LW"},
2715 {0, (1 << 1), "10GBASE-EW"},
2716 {0, (1 << 0), "Reserved"}
2718 static struct mxge_media_type mxge_sfp_media_types
[] =
2720 {0, (1 << 7), "Reserved"},
2721 {IFM_10G_LRM
, (1 << 6), "10GBASE-LRM"},
2722 {IFM_10G_LR
, (1 << 5), "10GBASE-LR"},
2723 {IFM_10G_SR
, (1 << 4), "10GBASE-SR"}
2727 mxge_set_media(mxge_softc_t
*sc
, int type
)
2729 sc
->media_flags
|= type
;
2730 ifmedia_add(&sc
->media
, sc
->media_flags
, 0, NULL
);
2731 ifmedia_set(&sc
->media
, sc
->media_flags
);
2736 * Determine the media type for a NIC. Some XFPs will identify
2737 * themselves only when their link is up, so this is initiated via a
2738 * link up interrupt. However, this can potentially take up to
2739 * several milliseconds, so it is run via the watchdog routine, rather
2740 * than in the interrupt handler itself. This need only be done
2741 * once, not each time the link is up.
2744 mxge_media_probe(mxge_softc_t
*sc
)
2749 struct mxge_media_type
*mxge_media_types
= NULL
;
2750 int i
, err
, ms
, mxge_media_type_entries
;
2753 sc
->need_media_probe
= 0;
2755 /* if we've already set a media type, we're done */
2756 if (sc
->media_flags
!= (IFM_ETHER
| IFM_AUTO
))
2760 * parse the product code to deterimine the interface type
2761 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character
2762 * after the 3rd dash in the driver's cached copy of the
2763 * EEPROM's product code string.
2765 ptr
= sc
->product_code_string
;
2767 device_printf(sc
->dev
, "Missing product code\n");
2770 for (i
= 0; i
< 3; i
++, ptr
++) {
2771 ptr
= index(ptr
, '-');
2773 device_printf(sc
->dev
,
2774 "only %d dashes in PC?!?\n", i
);
2780 mxge_set_media(sc
, IFM_10G_CX4
);
2783 else if (*ptr
== 'Q') {
2784 /* -Q is Quad Ribbon Fiber */
2785 device_printf(sc
->dev
, "Quad Ribbon Fiber Media\n");
2786 /* FreeBSD has no media type for Quad ribbon fiber */
2792 mxge_media_types
= mxge_xfp_media_types
;
2793 mxge_media_type_entries
=
2794 sizeof (mxge_xfp_media_types
) /
2795 sizeof (mxge_xfp_media_types
[0]);
2796 byte
= MXGE_XFP_COMPLIANCE_BYTE
;
2800 if (*ptr
== 'S' || *(ptr
+1) == 'S') {
2801 /* -S or -2S is SFP+ */
2802 mxge_media_types
= mxge_sfp_media_types
;
2803 mxge_media_type_entries
=
2804 sizeof (mxge_sfp_media_types
) /
2805 sizeof (mxge_sfp_media_types
[0]);
2810 if (mxge_media_types
== NULL
) {
2811 device_printf(sc
->dev
, "Unknown media type: %c\n", *ptr
);
2816 * At this point we know the NIC has an XFP cage, so now we
2817 * try to determine what is in the cage by using the
2818 * firmware's XFP I2C commands to read the XFP 10GbE compilance
2819 * register. We read just one byte, which may take over
2823 cmd
.data0
= 0; /* just fetch 1 byte, not all 256 */
2825 err
= mxge_send_cmd(sc
, MXGEFW_CMD_I2C_READ
, &cmd
);
2826 if (err
== MXGEFW_CMD_ERROR_I2C_FAILURE
) {
2827 device_printf(sc
->dev
, "failed to read XFP\n");
2829 if (err
== MXGEFW_CMD_ERROR_I2C_ABSENT
) {
2830 device_printf(sc
->dev
, "Type R/S with no XFP!?!?\n");
2832 if (err
!= MXGEFW_CMD_OK
) {
2836 /* now we wait for the data to be cached */
2838 err
= mxge_send_cmd(sc
, MXGEFW_CMD_I2C_BYTE
, &cmd
);
2839 for (ms
= 0; (err
== EBUSY
) && (ms
< 50); ms
++) {
2842 err
= mxge_send_cmd(sc
, MXGEFW_CMD_I2C_BYTE
, &cmd
);
2844 if (err
!= MXGEFW_CMD_OK
) {
2845 device_printf(sc
->dev
, "failed to read %s (%d, %dms)\n",
2846 cage_type
, err
, ms
);
2850 if (cmd
.data0
== mxge_media_types
[0].bitmask
) {
2852 device_printf(sc
->dev
, "%s:%s\n", cage_type
,
2853 mxge_media_types
[0].name
);
2854 mxge_set_media(sc
, IFM_10G_CX4
);
2857 for (i
= 1; i
< mxge_media_type_entries
; i
++) {
2858 if (cmd
.data0
& mxge_media_types
[i
].bitmask
) {
2860 device_printf(sc
->dev
, "%s:%s\n",
2862 mxge_media_types
[i
].name
);
2864 mxge_set_media(sc
, mxge_media_types
[i
].flag
);
2868 device_printf(sc
->dev
, "%s media 0x%x unknown\n", cage_type
,
2875 mxge_intr(void *arg
)
2877 struct mxge_slice_state
*ss
= arg
;
2878 mxge_softc_t
*sc
= ss
->sc
;
2879 mcp_irq_data_t
*stats
= ss
->fw_stats
;
2880 mxge_tx_ring_t
*tx
= &ss
->tx
;
2881 mxge_rx_done_t
*rx_done
= &ss
->rx_done
;
2882 uint32_t send_done_count
;
2886 #ifndef IFNET_BUF_RING
2887 /* an interrupt on a non-zero slice is implicitly valid
2888 since MSI-X irqs are not shared */
2890 mxge_clean_rx_done(ss
);
2891 *ss
->irq_claim
= be32toh(3);
2896 /* make sure the DMA has finished */
2897 if (!stats
->valid
) {
2900 valid
= stats
->valid
;
2902 if (sc
->legacy_irq
) {
2903 /* lower legacy IRQ */
2904 *sc
->irq_deassert
= 0;
2905 if (!mxge_deassert_wait
)
2906 /* don't wait for conf. that irq is low */
2912 /* loop while waiting for legacy irq deassertion */
2914 /* check for transmit completes and receives */
2915 send_done_count
= be32toh(stats
->send_done_count
);
2916 while ((send_done_count
!= tx
->pkt_done
) ||
2917 (rx_done
->entry
[rx_done
->idx
].length
!= 0)) {
2918 if (send_done_count
!= tx
->pkt_done
)
2919 mxge_tx_done(ss
, (int)send_done_count
);
2920 mxge_clean_rx_done(ss
);
2921 send_done_count
= be32toh(stats
->send_done_count
);
2923 if (sc
->legacy_irq
&& mxge_deassert_wait
)
2925 } while (*((volatile uint8_t *) &stats
->valid
));
2927 /* fw link & error stats meaningful only on the first slice */
2928 if (__predict_false((ss
== sc
->ss
) && stats
->stats_updated
)) {
2929 if (sc
->link_state
!= stats
->link_up
) {
2930 sc
->link_state
= stats
->link_up
;
2931 if (sc
->link_state
) {
2932 sc
->ifp
->if_link_state
= LINK_STATE_UP
;
2933 if_link_state_change(sc
->ifp
);
2935 device_printf(sc
->dev
, "link up\n");
2937 sc
->ifp
->if_link_state
= LINK_STATE_DOWN
;
2938 if_link_state_change(sc
->ifp
);
2940 device_printf(sc
->dev
, "link down\n");
2942 sc
->need_media_probe
= 1;
2944 if (sc
->rdma_tags_available
!=
2945 be32toh(stats
->rdma_tags_available
)) {
2946 sc
->rdma_tags_available
=
2947 be32toh(stats
->rdma_tags_available
);
2948 device_printf(sc
->dev
, "RDMA timed out! %d tags "
2949 "left\n", sc
->rdma_tags_available
);
2952 if (stats
->link_down
) {
2953 sc
->down_cnt
+= stats
->link_down
;
2955 if_link_state_change(sc
->ifp
, LINK_STATE_DOWN
);
2959 /* check to see if we have rx token to pass back */
2961 *ss
->irq_claim
= be32toh(3);
2962 *(ss
->irq_claim
+ 1) = be32toh(3);
2966 mxge_init(void *arg
)
2973 mxge_free_slice_mbufs(struct mxge_slice_state
*ss
)
2975 struct lro_entry
*lro_entry
;
2978 while (!SLIST_EMPTY(&ss
->lro_free
)) {
2979 lro_entry
= SLIST_FIRST(&ss
->lro_free
);
2980 SLIST_REMOVE_HEAD(&ss
->lro_free
, next
);
2981 kfree(lro_entry
, M_DEVBUF
);
2984 for (i
= 0; i
<= ss
->rx_big
.mask
; i
++) {
2985 if (ss
->rx_big
.info
[i
].m
== NULL
)
2987 bus_dmamap_unload(ss
->rx_big
.dmat
,
2988 ss
->rx_big
.info
[i
].map
);
2989 m_freem(ss
->rx_big
.info
[i
].m
);
2990 ss
->rx_big
.info
[i
].m
= NULL
;
2993 for (i
= 0; i
<= ss
->rx_small
.mask
; i
++) {
2994 if (ss
->rx_small
.info
[i
].m
== NULL
)
2996 bus_dmamap_unload(ss
->rx_small
.dmat
,
2997 ss
->rx_small
.info
[i
].map
);
2998 m_freem(ss
->rx_small
.info
[i
].m
);
2999 ss
->rx_small
.info
[i
].m
= NULL
;
3002 /* transmit ring used only on the first slice */
3003 if (ss
->tx
.info
== NULL
)
3006 for (i
= 0; i
<= ss
->tx
.mask
; i
++) {
3007 ss
->tx
.info
[i
].flag
= 0;
3008 if (ss
->tx
.info
[i
].m
== NULL
)
3010 bus_dmamap_unload(ss
->tx
.dmat
,
3011 ss
->tx
.info
[i
].map
);
3012 m_freem(ss
->tx
.info
[i
].m
);
3013 ss
->tx
.info
[i
].m
= NULL
;
3018 mxge_free_mbufs(mxge_softc_t
*sc
)
3022 for (slice
= 0; slice
< sc
->num_slices
; slice
++)
3023 mxge_free_slice_mbufs(&sc
->ss
[slice
]);
3027 mxge_free_slice_rings(struct mxge_slice_state
*ss
)
3032 if (ss
->rx_done
.entry
!= NULL
)
3033 mxge_dma_free(&ss
->rx_done
.dma
);
3034 ss
->rx_done
.entry
= NULL
;
3036 if (ss
->tx
.req_bytes
!= NULL
)
3037 kfree(ss
->tx
.req_bytes
, M_DEVBUF
);
3038 ss
->tx
.req_bytes
= NULL
;
3040 if (ss
->tx
.seg_list
!= NULL
)
3041 kfree(ss
->tx
.seg_list
, M_DEVBUF
);
3042 ss
->tx
.seg_list
= NULL
;
3044 if (ss
->rx_small
.shadow
!= NULL
)
3045 kfree(ss
->rx_small
.shadow
, M_DEVBUF
);
3046 ss
->rx_small
.shadow
= NULL
;
3048 if (ss
->rx_big
.shadow
!= NULL
)
3049 kfree(ss
->rx_big
.shadow
, M_DEVBUF
);
3050 ss
->rx_big
.shadow
= NULL
;
3052 if (ss
->tx
.info
!= NULL
) {
3053 if (ss
->tx
.dmat
!= NULL
) {
3054 for (i
= 0; i
<= ss
->tx
.mask
; i
++) {
3055 bus_dmamap_destroy(ss
->tx
.dmat
,
3056 ss
->tx
.info
[i
].map
);
3058 bus_dma_tag_destroy(ss
->tx
.dmat
);
3060 kfree(ss
->tx
.info
, M_DEVBUF
);
3064 if (ss
->rx_small
.info
!= NULL
) {
3065 if (ss
->rx_small
.dmat
!= NULL
) {
3066 for (i
= 0; i
<= ss
->rx_small
.mask
; i
++) {
3067 bus_dmamap_destroy(ss
->rx_small
.dmat
,
3068 ss
->rx_small
.info
[i
].map
);
3070 bus_dmamap_destroy(ss
->rx_small
.dmat
,
3071 ss
->rx_small
.extra_map
);
3072 bus_dma_tag_destroy(ss
->rx_small
.dmat
);
3074 kfree(ss
->rx_small
.info
, M_DEVBUF
);
3076 ss
->rx_small
.info
= NULL
;
3078 if (ss
->rx_big
.info
!= NULL
) {
3079 if (ss
->rx_big
.dmat
!= NULL
) {
3080 for (i
= 0; i
<= ss
->rx_big
.mask
; i
++) {
3081 bus_dmamap_destroy(ss
->rx_big
.dmat
,
3082 ss
->rx_big
.info
[i
].map
);
3084 bus_dmamap_destroy(ss
->rx_big
.dmat
,
3085 ss
->rx_big
.extra_map
);
3086 bus_dma_tag_destroy(ss
->rx_big
.dmat
);
3088 kfree(ss
->rx_big
.info
, M_DEVBUF
);
3090 ss
->rx_big
.info
= NULL
;
3094 mxge_free_rings(mxge_softc_t
*sc
)
3098 for (slice
= 0; slice
< sc
->num_slices
; slice
++)
3099 mxge_free_slice_rings(&sc
->ss
[slice
]);
3103 mxge_alloc_slice_rings(struct mxge_slice_state
*ss
, int rx_ring_entries
,
3104 int tx_ring_entries
)
3106 mxge_softc_t
*sc
= ss
->sc
;
3112 /* allocate per-slice receive resources */
3114 ss
->rx_small
.mask
= ss
->rx_big
.mask
= rx_ring_entries
- 1;
3115 ss
->rx_done
.mask
= (2 * rx_ring_entries
) - 1;
3117 /* allocate the rx shadow rings */
3118 bytes
= rx_ring_entries
* sizeof (*ss
->rx_small
.shadow
);
3119 ss
->rx_small
.shadow
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3120 if (ss
->rx_small
.shadow
== NULL
)
3123 bytes
= rx_ring_entries
* sizeof (*ss
->rx_big
.shadow
);
3124 ss
->rx_big
.shadow
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3125 if (ss
->rx_big
.shadow
== NULL
)
3128 /* allocate the rx host info rings */
3129 bytes
= rx_ring_entries
* sizeof (*ss
->rx_small
.info
);
3130 ss
->rx_small
.info
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3131 if (ss
->rx_small
.info
== NULL
)
3134 bytes
= rx_ring_entries
* sizeof (*ss
->rx_big
.info
);
3135 ss
->rx_big
.info
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3136 if (ss
->rx_big
.info
== NULL
)
3139 /* allocate the rx busdma resources */
3140 err
= bus_dma_tag_create(sc
->parent_dmat
, /* parent */
3142 4096, /* boundary */
3143 BUS_SPACE_MAXADDR
, /* low */
3144 BUS_SPACE_MAXADDR
, /* high */
3145 NULL
, NULL
, /* filter */
3146 MHLEN
, /* maxsize */
3148 MHLEN
, /* maxsegsize */
3149 BUS_DMA_ALLOCNOW
, /* flags */
3150 NULL
, NULL
, /* lock */
3151 &ss
->rx_small
.dmat
); /* tag */
3153 device_printf(sc
->dev
, "Err %d allocating rx_small dmat\n",
3158 err
= bus_dma_tag_create(sc
->parent_dmat
, /* parent */
3160 #if MXGE_VIRT_JUMBOS
3161 4096, /* boundary */
3165 BUS_SPACE_MAXADDR
, /* low */
3166 BUS_SPACE_MAXADDR
, /* high */
3167 NULL
, NULL
, /* filter */
3168 3*4096, /* maxsize */
3169 #if MXGE_VIRT_JUMBOS
3171 4096, /* maxsegsize*/
3174 MJUM9BYTES
, /* maxsegsize*/
3176 BUS_DMA_ALLOCNOW
, /* flags */
3177 NULL
, NULL
, /* lock */
3178 &ss
->rx_big
.dmat
); /* tag */
3180 device_printf(sc
->dev
, "Err %d allocating rx_big dmat\n",
3184 for (i
= 0; i
<= ss
->rx_small
.mask
; i
++) {
3185 err
= bus_dmamap_create(ss
->rx_small
.dmat
, 0,
3186 &ss
->rx_small
.info
[i
].map
);
3188 device_printf(sc
->dev
, "Err %d rx_small dmamap\n",
3193 err
= bus_dmamap_create(ss
->rx_small
.dmat
, 0,
3194 &ss
->rx_small
.extra_map
);
3196 device_printf(sc
->dev
, "Err %d extra rx_small dmamap\n",
3201 for (i
= 0; i
<= ss
->rx_big
.mask
; i
++) {
3202 err
= bus_dmamap_create(ss
->rx_big
.dmat
, 0,
3203 &ss
->rx_big
.info
[i
].map
);
3205 device_printf(sc
->dev
, "Err %d rx_big dmamap\n",
3210 err
= bus_dmamap_create(ss
->rx_big
.dmat
, 0,
3211 &ss
->rx_big
.extra_map
);
3213 device_printf(sc
->dev
, "Err %d extra rx_big dmamap\n",
3218 /* now allocate TX resouces */
3220 #ifndef IFNET_BUF_RING
3221 /* only use a single TX ring for now */
3222 if (ss
!= ss
->sc
->ss
)
3226 ss
->tx
.mask
= tx_ring_entries
- 1;
3227 ss
->tx
.max_desc
= MIN(MXGE_MAX_SEND_DESC
, tx_ring_entries
/ 4);
3230 /* allocate the tx request copy block */
3232 sizeof (*ss
->tx
.req_list
) * (ss
->tx
.max_desc
+ 4);
3233 ss
->tx
.req_bytes
= kmalloc(bytes
, M_DEVBUF
, M_WAITOK
);
3234 if (ss
->tx
.req_bytes
== NULL
)
3236 /* ensure req_list entries are aligned to 8 bytes */
3237 ss
->tx
.req_list
= (mcp_kreq_ether_send_t
*)
3238 ((unsigned long)(ss
->tx
.req_bytes
+ 7) & ~7UL);
3240 /* allocate the tx busdma segment list */
3241 bytes
= sizeof (*ss
->tx
.seg_list
) * ss
->tx
.max_desc
;
3242 ss
->tx
.seg_list
= (bus_dma_segment_t
*)
3243 kmalloc(bytes
, M_DEVBUF
, M_WAITOK
);
3244 if (ss
->tx
.seg_list
== NULL
)
3247 /* allocate the tx host info ring */
3248 bytes
= tx_ring_entries
* sizeof (*ss
->tx
.info
);
3249 ss
->tx
.info
= kmalloc(bytes
, M_DEVBUF
, M_ZERO
|M_WAITOK
);
3250 if (ss
->tx
.info
== NULL
)
3253 /* allocate the tx busdma resources */
3254 err
= bus_dma_tag_create(sc
->parent_dmat
, /* parent */
3256 sc
->tx_boundary
, /* boundary */
3257 BUS_SPACE_MAXADDR
, /* low */
3258 BUS_SPACE_MAXADDR
, /* high */
3259 NULL
, NULL
, /* filter */
3260 65536 + 256, /* maxsize */
3261 ss
->tx
.max_desc
- 2, /* num segs */
3262 sc
->tx_boundary
, /* maxsegsz */
3263 BUS_DMA_ALLOCNOW
, /* flags */
3264 NULL
, NULL
, /* lock */
3265 &ss
->tx
.dmat
); /* tag */
3268 device_printf(sc
->dev
, "Err %d allocating tx dmat\n",
3273 /* now use these tags to setup dmamaps for each slot
3275 for (i
= 0; i
<= ss
->tx
.mask
; i
++) {
3276 err
= bus_dmamap_create(ss
->tx
.dmat
, 0,
3277 &ss
->tx
.info
[i
].map
);
3279 device_printf(sc
->dev
, "Err %d tx dmamap\n",
3289 mxge_alloc_rings(mxge_softc_t
*sc
)
3293 int tx_ring_entries
, rx_ring_entries
;
3296 /* get ring sizes */
3297 err
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_SEND_RING_SIZE
, &cmd
);
3298 tx_ring_size
= cmd
.data0
;
3300 device_printf(sc
->dev
, "Cannot determine tx ring sizes\n");
3304 tx_ring_entries
= tx_ring_size
/ sizeof (mcp_kreq_ether_send_t
);
3305 rx_ring_entries
= sc
->rx_ring_size
/ sizeof (mcp_dma_addr_t
);
3306 IFQ_SET_MAXLEN(&sc
->ifp
->if_snd
, tx_ring_entries
- 1);
3307 sc
->ifp
->if_snd
.ifq_drv_maxlen
= sc
->ifp
->if_snd
.ifq_maxlen
;
3308 IFQ_SET_READY(&sc
->ifp
->if_snd
);
3310 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
3311 err
= mxge_alloc_slice_rings(&sc
->ss
[slice
],
3320 mxge_free_rings(sc
);
3327 mxge_choose_params(int mtu
, int *big_buf_size
, int *cl_size
, int *nbufs
)
3329 int bufsize
= mtu
+ ETHER_HDR_LEN
+ ETHER_VLAN_ENCAP_LEN
+ MXGEFW_PAD
;
3331 if (bufsize
< MCLBYTES
) {
3332 /* easy, everything fits in a single buffer */
3333 *big_buf_size
= MCLBYTES
;
3334 *cl_size
= MCLBYTES
;
3339 if (bufsize
< MJUMPAGESIZE
) {
3340 /* still easy, everything still fits in a single buffer */
3341 *big_buf_size
= MJUMPAGESIZE
;
3342 *cl_size
= MJUMPAGESIZE
;
3346 #if MXGE_VIRT_JUMBOS
3347 /* now we need to use virtually contiguous buffers */
3348 *cl_size
= MJUM9BYTES
;
3349 *big_buf_size
= 4096;
3350 *nbufs
= mtu
/ 4096 + 1;
3351 /* needs to be a power of two, so round up */
3355 *cl_size
= MJUM9BYTES
;
3356 *big_buf_size
= MJUM9BYTES
;
3362 mxge_slice_open(struct mxge_slice_state
*ss
, int nbufs
, int cl_size
)
3367 struct lro_entry
*lro_entry
;
3372 slice
= ss
- sc
->ss
;
3374 SLIST_INIT(&ss
->lro_free
);
3375 SLIST_INIT(&ss
->lro_active
);
3377 for (i
= 0; i
< sc
->lro_cnt
; i
++) {
3378 lro_entry
= (struct lro_entry
*)
3379 kmalloc(sizeof (*lro_entry
), M_DEVBUF
,
3381 if (lro_entry
== NULL
) {
3385 SLIST_INSERT_HEAD(&ss
->lro_free
, lro_entry
, next
);
3387 /* get the lanai pointers to the send and receive rings */
3390 #ifndef IFNET_BUF_RING
3391 /* We currently only send from the first slice */
3395 err
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_SEND_OFFSET
, &cmd
);
3397 (volatile mcp_kreq_ether_send_t
*)(sc
->sram
+ cmd
.data0
);
3398 ss
->tx
.send_go
= (volatile uint32_t *)
3399 (sc
->sram
+ MXGEFW_ETH_SEND_GO
+ 64 * slice
);
3400 ss
->tx
.send_stop
= (volatile uint32_t *)
3401 (sc
->sram
+ MXGEFW_ETH_SEND_STOP
+ 64 * slice
);
3402 #ifndef IFNET_BUF_RING
3406 err
|= mxge_send_cmd(sc
,
3407 MXGEFW_CMD_GET_SMALL_RX_OFFSET
, &cmd
);
3408 ss
->rx_small
.lanai
=
3409 (volatile mcp_kreq_ether_recv_t
*)(sc
->sram
+ cmd
.data0
);
3411 err
|= mxge_send_cmd(sc
, MXGEFW_CMD_GET_BIG_RX_OFFSET
, &cmd
);
3413 (volatile mcp_kreq_ether_recv_t
*)(sc
->sram
+ cmd
.data0
);
3416 device_printf(sc
->dev
,
3417 "failed to get ring sizes or locations\n");
3421 /* stock receive rings */
3422 for (i
= 0; i
<= ss
->rx_small
.mask
; i
++) {
3423 map
= ss
->rx_small
.info
[i
].map
;
3424 err
= mxge_get_buf_small(ss
, map
, i
);
3426 device_printf(sc
->dev
, "alloced %d/%d smalls\n",
3427 i
, ss
->rx_small
.mask
+ 1);
3431 for (i
= 0; i
<= ss
->rx_big
.mask
; i
++) {
3432 ss
->rx_big
.shadow
[i
].addr_low
= 0xffffffff;
3433 ss
->rx_big
.shadow
[i
].addr_high
= 0xffffffff;
3435 ss
->rx_big
.nbufs
= nbufs
;
3436 ss
->rx_big
.cl_size
= cl_size
;
3437 ss
->rx_big
.mlen
= ss
->sc
->ifp
->if_mtu
+ ETHER_HDR_LEN
+
3438 ETHER_VLAN_ENCAP_LEN
+ MXGEFW_PAD
;
3439 for (i
= 0; i
<= ss
->rx_big
.mask
; i
+= ss
->rx_big
.nbufs
) {
3440 map
= ss
->rx_big
.info
[i
].map
;
3441 err
= mxge_get_buf_big(ss
, map
, i
);
3443 device_printf(sc
->dev
, "alloced %d/%d bigs\n",
3444 i
, ss
->rx_big
.mask
+ 1);
3452 mxge_open(mxge_softc_t
*sc
)
3455 int err
, big_bytes
, nbufs
, slice
, cl_size
, i
;
3457 volatile uint8_t *itable
;
3458 struct mxge_slice_state
*ss
;
3460 /* Copy the MAC address in case it was overridden */
3461 bcopy(IF_LLADDR(sc
->ifp
), sc
->mac_addr
, ETHER_ADDR_LEN
);
3463 err
= mxge_reset(sc
, 1);
3465 device_printf(sc
->dev
, "failed to reset\n");
3469 if (sc
->num_slices
> 1) {
3470 /* setup the indirection table */
3471 cmd
.data0
= sc
->num_slices
;
3472 err
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_RSS_TABLE_SIZE
,
3475 err
|= mxge_send_cmd(sc
, MXGEFW_CMD_GET_RSS_TABLE_OFFSET
,
3478 device_printf(sc
->dev
,
3479 "failed to setup rss tables\n");
3483 /* just enable an identity mapping */
3484 itable
= sc
->sram
+ cmd
.data0
;
3485 for (i
= 0; i
< sc
->num_slices
; i
++)
3486 itable
[i
] = (uint8_t)i
;
3489 cmd
.data1
= mxge_rss_hash_type
;
3490 err
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_RSS_ENABLE
, &cmd
);
3492 device_printf(sc
->dev
, "failed to enable slices\n");
3498 mxge_choose_params(sc
->ifp
->if_mtu
, &big_bytes
, &cl_size
, &nbufs
);
3501 err
= mxge_send_cmd(sc
, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS
,
3503 /* error is only meaningful if we're trying to set
3504 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */
3505 if (err
&& nbufs
> 1) {
3506 device_printf(sc
->dev
,
3507 "Failed to set alway-use-n to %d\n",
3511 /* Give the firmware the mtu and the big and small buffer
3512 sizes. The firmware wants the big buf size to be a power
3513 of two. Luckily, FreeBSD's clusters are powers of two */
3514 cmd
.data0
= sc
->ifp
->if_mtu
+ ETHER_HDR_LEN
+ ETHER_VLAN_ENCAP_LEN
;
3515 err
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_MTU
, &cmd
);
3516 cmd
.data0
= MHLEN
- MXGEFW_PAD
;
3517 err
|= mxge_send_cmd(sc
, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE
,
3519 cmd
.data0
= big_bytes
;
3520 err
|= mxge_send_cmd(sc
, MXGEFW_CMD_SET_BIG_BUFFER_SIZE
, &cmd
);
3523 device_printf(sc
->dev
, "failed to setup params\n");
3527 /* Now give him the pointer to the stats block */
3529 #ifdef IFNET_BUF_RING
3530 slice
< sc
->num_slices
;
3535 ss
= &sc
->ss
[slice
];
3537 MXGE_LOWPART_TO_U32(ss
->fw_stats_dma
.bus_addr
);
3539 MXGE_HIGHPART_TO_U32(ss
->fw_stats_dma
.bus_addr
);
3540 cmd
.data2
= sizeof(struct mcp_irq_data
);
3541 cmd
.data2
|= (slice
<< 16);
3542 err
|= mxge_send_cmd(sc
, MXGEFW_CMD_SET_STATS_DMA_V2
, &cmd
);
3546 bus
= sc
->ss
->fw_stats_dma
.bus_addr
;
3547 bus
+= offsetof(struct mcp_irq_data
, send_done_count
);
3548 cmd
.data0
= MXGE_LOWPART_TO_U32(bus
);
3549 cmd
.data1
= MXGE_HIGHPART_TO_U32(bus
);
3550 err
= mxge_send_cmd(sc
,
3551 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE
,
3553 /* Firmware cannot support multicast without STATS_DMA_V2 */
3554 sc
->fw_multicast_support
= 0;
3556 sc
->fw_multicast_support
= 1;
3560 device_printf(sc
->dev
, "failed to setup params\n");
3564 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
3565 err
= mxge_slice_open(&sc
->ss
[slice
], nbufs
, cl_size
);
3567 device_printf(sc
->dev
, "couldn't open slice %d\n",
3573 /* Finally, start the firmware running */
3574 err
= mxge_send_cmd(sc
, MXGEFW_CMD_ETHERNET_UP
, &cmd
);
3576 device_printf(sc
->dev
, "Couldn't bring up link\n");
3579 #ifdef IFNET_BUF_RING
3580 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
3581 ss
= &sc
->ss
[slice
];
3582 ss
->if_drv_flags
|= IFF_DRV_RUNNING
;
3583 ss
->if_drv_flags
&= ~IFF_DRV_OACTIVE
;
3586 sc
->ifp
->if_drv_flags
|= IFF_DRV_RUNNING
;
3587 sc
->ifp
->if_drv_flags
&= ~IFF_DRV_OACTIVE
;
3588 callout_reset(&sc
->co_hdl
, mxge_ticks
, mxge_tick
, sc
);
3594 mxge_free_mbufs(sc
);
3600 mxge_close(mxge_softc_t
*sc
)
3603 int err
, old_down_cnt
;
3604 #ifdef IFNET_BUF_RING
3605 struct mxge_slice_state
*ss
;
3609 callout_stop(&sc
->co_hdl
);
3610 #ifdef IFNET_BUF_RING
3611 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
3612 ss
= &sc
->ss
[slice
];
3613 ss
->if_drv_flags
&= ~IFF_DRV_RUNNING
;
3616 sc
->ifp
->if_drv_flags
&= ~IFF_DRV_RUNNING
;
3617 old_down_cnt
= sc
->down_cnt
;
3619 err
= mxge_send_cmd(sc
, MXGEFW_CMD_ETHERNET_DOWN
, &cmd
);
3621 device_printf(sc
->dev
, "Couldn't bring down link\n");
3623 if (old_down_cnt
== sc
->down_cnt
) {
3624 /* wait for down irq */
3625 DELAY(10 * sc
->intr_coal_delay
);
3628 if (old_down_cnt
== sc
->down_cnt
) {
3629 device_printf(sc
->dev
, "never got down irq\n");
3632 mxge_free_mbufs(sc
);
3638 mxge_setup_cfg_space(mxge_softc_t
*sc
)
3640 device_t dev
= sc
->dev
;
3642 uint16_t cmd
, lnk
, pectl
;
3644 /* find the PCIe link width and set max read request to 4KB*/
3645 if (pci_find_extcap(dev
, PCIY_EXPRESS
, ®
) == 0) {
3646 lnk
= pci_read_config(dev
, reg
+ 0x12, 2);
3647 sc
->link_width
= (lnk
>> 4) & 0x3f;
3649 pectl
= pci_read_config(dev
, reg
+ 0x8, 2);
3650 pectl
= (pectl
& ~0x7000) | (5 << 12);
3651 pci_write_config(dev
, reg
+ 0x8, pectl
, 2);
3654 /* Enable DMA and Memory space access */
3655 pci_enable_busmaster(dev
);
3656 cmd
= pci_read_config(dev
, PCIR_COMMAND
, 2);
3657 cmd
|= PCIM_CMD_MEMEN
;
3658 pci_write_config(dev
, PCIR_COMMAND
, cmd
, 2);
3662 mxge_read_reboot(mxge_softc_t
*sc
)
3664 device_t dev
= sc
->dev
;
3667 /* find the vendor specific offset */
3668 if (pci_find_extcap(dev
, PCIY_VENDOR
, &vs
) != 0) {
3669 device_printf(sc
->dev
,
3670 "could not find vendor specific offset\n");
3671 return (uint32_t)-1;
3673 /* enable read32 mode */
3674 pci_write_config(dev
, vs
+ 0x10, 0x3, 1);
3675 /* tell NIC which register to read */
3676 pci_write_config(dev
, vs
+ 0x18, 0xfffffff0, 4);
3677 return (pci_read_config(dev
, vs
+ 0x14, 4));
3681 mxge_watchdog_reset(mxge_softc_t
*sc
, int slice
)
3683 struct pci_devinfo
*dinfo
;
3691 device_printf(sc
->dev
, "Watchdog reset!\n");
3694 * check to see if the NIC rebooted. If it did, then all of
3695 * PCI config space has been reset, and things like the
3696 * busmaster bit will be zero. If this is the case, then we
3697 * must restore PCI config space before the NIC can be used
3700 cmd
= pci_read_config(sc
->dev
, PCIR_COMMAND
, 2);
3701 if (cmd
== 0xffff) {
3703 * maybe the watchdog caught the NIC rebooting; wait
3704 * up to 100ms for it to finish. If it does not come
3705 * back, then give up
3708 cmd
= pci_read_config(sc
->dev
, PCIR_COMMAND
, 2);
3709 if (cmd
== 0xffff) {
3710 device_printf(sc
->dev
, "NIC disappeared!\n");
3714 if ((cmd
& PCIM_CMD_BUSMASTEREN
) == 0) {
3715 /* print the reboot status */
3716 reboot
= mxge_read_reboot(sc
);
3717 device_printf(sc
->dev
, "NIC rebooted, status = 0x%x\n",
3719 /* restore PCI configuration space */
3720 dinfo
= device_get_ivars(sc
->dev
);
3721 pci_cfg_restore(sc
->dev
, dinfo
);
3723 /* and redo any changes we made to our config space */
3724 mxge_setup_cfg_space(sc
);
3726 if (sc
->ifp
->if_drv_flags
& IFF_DRV_RUNNING
) {
3728 err
= mxge_open(sc
);
3731 tx
= &sc
->ss
[slice
].tx
;
3732 device_printf(sc
->dev
,
3733 "NIC did not reboot, slice %d ring state:\n",
3735 device_printf(sc
->dev
,
3736 "tx.req=%d tx.done=%d, tx.queue_active=%d\n",
3737 tx
->req
, tx
->done
, tx
->queue_active
);
3738 device_printf(sc
->dev
, "tx.activate=%d tx.deactivate=%d\n",
3739 tx
->activate
, tx
->deactivate
);
3740 device_printf(sc
->dev
, "pkt_done=%d fw=%d\n",
3742 be32toh(sc
->ss
->fw_stats
->send_done_count
));
3743 device_printf(sc
->dev
, "not resetting\n");
3749 mxge_watchdog(mxge_softc_t
*sc
)
3752 uint32_t rx_pause
= be32toh(sc
->ss
->fw_stats
->dropped_pause
);
3755 /* see if we have outstanding transmits, which
3756 have been pending for more than mxge_ticks */
3758 #ifdef IFNET_BUF_RING
3759 (i
< sc
->num_slices
) && (err
== 0);
3761 (i
< 1) && (err
== 0);
3765 if (tx
->req
!= tx
->done
&&
3766 tx
->watchdog_req
!= tx
->watchdog_done
&&
3767 tx
->done
== tx
->watchdog_done
) {
3768 /* check for pause blocking before resetting */
3769 if (tx
->watchdog_rx_pause
== rx_pause
)
3770 err
= mxge_watchdog_reset(sc
, i
);
3772 device_printf(sc
->dev
, "Flow control blocking "
3773 "xmits, check link partner\n");
3776 tx
->watchdog_req
= tx
->req
;
3777 tx
->watchdog_done
= tx
->done
;
3778 tx
->watchdog_rx_pause
= rx_pause
;
3781 if (sc
->need_media_probe
)
3782 mxge_media_probe(sc
);
3787 mxge_update_stats(mxge_softc_t
*sc
)
3789 struct mxge_slice_state
*ss
;
3790 u_long ipackets
= 0;
3791 u_long opackets
= 0;
3792 #ifdef IFNET_BUF_RING
3800 for (slice
= 0; slice
< sc
->num_slices
; slice
++) {
3801 ss
= &sc
->ss
[slice
];
3802 ipackets
+= ss
->ipackets
;
3803 opackets
+= ss
->opackets
;
3804 #ifdef IFNET_BUF_RING
3805 obytes
+= ss
->obytes
;
3806 omcasts
+= ss
->omcasts
;
3807 odrops
+= ss
->tx
.br
->br_drops
;
3809 oerrors
+= ss
->oerrors
;
3811 sc
->ifp
->if_ipackets
= ipackets
;
3812 sc
->ifp
->if_opackets
= opackets
;
3813 #ifdef IFNET_BUF_RING
3814 sc
->ifp
->if_obytes
= obytes
;
3815 sc
->ifp
->if_omcasts
= omcasts
;
3816 sc
->ifp
->if_snd
.ifq_drops
= odrops
;
3818 sc
->ifp
->if_oerrors
= oerrors
;
3822 mxge_tick(void *arg
)
3824 mxge_softc_t
*sc
= arg
;
3827 lockmgr(&sc
->driver_lock
, LK_EXCLUSIVE
);
3828 /* aggregate stats from different slices */
3829 mxge_update_stats(sc
);
3830 if (!sc
->watchdog_countdown
) {
3831 err
= mxge_watchdog(sc
);
3832 sc
->watchdog_countdown
= 4;
3834 sc
->watchdog_countdown
--;
3836 callout_reset(&sc
->co_hdl
, mxge_ticks
, mxge_tick
, sc
);
3837 lockmgr(&sc
->driver_lock
, LK_RELEASE
);
3841 mxge_media_change(struct ifnet
*ifp
)
3847 mxge_change_mtu(mxge_softc_t
*sc
, int mtu
)
3849 struct ifnet
*ifp
= sc
->ifp
;
3850 int real_mtu
, old_mtu
;
3854 real_mtu
= mtu
+ ETHER_HDR_LEN
+ ETHER_VLAN_ENCAP_LEN
;
3855 if ((real_mtu
> sc
->max_mtu
) || real_mtu
< 60)
3857 lockmgr(&sc
->driver_lock
, LK_EXCLUSIVE
);
3858 old_mtu
= ifp
->if_mtu
;
3860 if (ifp
->if_drv_flags
& IFF_DRV_RUNNING
) {
3862 err
= mxge_open(sc
);
3864 ifp
->if_mtu
= old_mtu
;
3866 (void) mxge_open(sc
);
3869 lockmgr(&sc
->driver_lock
, LK_RELEASE
);
3874 mxge_media_status(struct ifnet
*ifp
, struct ifmediareq
*ifmr
)
3876 mxge_softc_t
*sc
= ifp
->if_softc
;
3881 ifmr
->ifm_status
= IFM_AVALID
;
3882 ifmr
->ifm_status
|= sc
->link_state
? IFM_ACTIVE
: 0;
3883 ifmr
->ifm_active
= IFM_AUTO
| IFM_ETHER
;
3884 ifmr
->ifm_active
|= sc
->link_state
? IFM_FDX
: 0;
3888 mxge_ioctl(struct ifnet
*ifp
, u_long command
, caddr_t data
)
3890 mxge_softc_t
*sc
= ifp
->if_softc
;
3891 struct ifreq
*ifr
= (struct ifreq
*)data
;
3898 err
= ether_ioctl(ifp
, command
, data
);
3902 err
= mxge_change_mtu(sc
, ifr
->ifr_mtu
);
3906 lockmgr(&sc
->driver_lock
, LK_EXCLUSIVE
);
3908 lockmgr(&sc
->driver_lock
, LK_RELEASE
);
3911 if (ifp
->if_flags
& IFF_UP
) {
3912 if (!(ifp
->if_drv_flags
& IFF_DRV_RUNNING
)) {
3913 err
= mxge_open(sc
);
3915 /* take care of promis can allmulti
3917 mxge_change_promisc(sc
,
3918 ifp
->if_flags
& IFF_PROMISC
);
3919 mxge_set_multicast_list(sc
);
3922 if (ifp
->if_drv_flags
& IFF_DRV_RUNNING
) {
3926 lockmgr(&sc
->driver_lock
, LK_RELEASE
);
3931 lockmgr(&sc
->driver_lock
, LK_EXCLUSIVE
);
3932 mxge_set_multicast_list(sc
);
3933 lockmgr(&sc
->driver_lock
, LK_RELEASE
);
3937 lockmgr(&sc
->driver_lock
, LK_EXCLUSIVE
);
3938 mask
= ifr
->ifr_reqcap
^ ifp
->if_capenable
;
3939 if (mask
& IFCAP_TXCSUM
) {
3940 if (IFCAP_TXCSUM
& ifp
->if_capenable
) {
3941 ifp
->if_capenable
&= ~(IFCAP_TXCSUM
|IFCAP_TSO4
);
3942 ifp
->if_hwassist
&= ~(CSUM_TCP
| CSUM_UDP
3945 ifp
->if_capenable
|= IFCAP_TXCSUM
;
3946 ifp
->if_hwassist
|= (CSUM_TCP
| CSUM_UDP
);
3948 } else if (mask
& IFCAP_RXCSUM
) {
3949 if (IFCAP_RXCSUM
& ifp
->if_capenable
) {
3950 ifp
->if_capenable
&= ~IFCAP_RXCSUM
;
3953 ifp
->if_capenable
|= IFCAP_RXCSUM
;
3957 if (mask
& IFCAP_TSO4
) {
3958 if (IFCAP_TSO4
& ifp
->if_capenable
) {
3959 ifp
->if_capenable
&= ~IFCAP_TSO4
;
3960 ifp
->if_hwassist
&= ~CSUM_TSO
;
3961 } else if (IFCAP_TXCSUM
& ifp
->if_capenable
) {
3962 ifp
->if_capenable
|= IFCAP_TSO4
;
3963 ifp
->if_hwassist
|= CSUM_TSO
;
3965 printf("mxge requires tx checksum offload"
3966 " be enabled to use TSO\n");
3970 if (mask
& IFCAP_LRO
) {
3971 if (IFCAP_LRO
& ifp
->if_capenable
)
3972 err
= mxge_change_lro_locked(sc
, 0);
3974 err
= mxge_change_lro_locked(sc
, mxge_lro_cnt
);
3976 if (mask
& IFCAP_VLAN_HWTAGGING
)
3977 ifp
->if_capenable
^= IFCAP_VLAN_HWTAGGING
;
3978 lockmgr(&sc
->driver_lock
, LK_RELEASE
);
3979 VLAN_CAPABILITIES(ifp
);
3984 err
= ifmedia_ioctl(ifp
, (struct ifreq
*)data
,
3985 &sc
->media
, command
);
3995 mxge_fetch_tunables(mxge_softc_t
*sc
)
3998 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices
);
3999 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled",
4000 &mxge_flow_control
);
4001 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay",
4002 &mxge_intr_coal_delay
);
4003 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable",
4004 &mxge_nvidia_ecrc_enable
);
4005 TUNABLE_INT_FETCH("hw.mxge.force_firmware",
4006 &mxge_force_firmware
);
4007 TUNABLE_INT_FETCH("hw.mxge.deassert_wait",
4008 &mxge_deassert_wait
);
4009 TUNABLE_INT_FETCH("hw.mxge.verbose",
4011 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks
);
4012 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc
->lro_cnt
);
4013 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc
);
4014 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type
);
4015 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu
);
4016 if (sc
->lro_cnt
!= 0)
4017 mxge_lro_cnt
= sc
->lro_cnt
;
4021 if (mxge_intr_coal_delay
< 0 || mxge_intr_coal_delay
> 10*1000)
4022 mxge_intr_coal_delay
= 30;
4023 if (mxge_ticks
== 0)
4024 mxge_ticks
= hz
/ 2;
4025 sc
->pause
= mxge_flow_control
;
4026 if (mxge_rss_hash_type
< MXGEFW_RSS_HASH_TYPE_IPV4
4027 || mxge_rss_hash_type
> MXGEFW_RSS_HASH_TYPE_MAX
) {
4028 mxge_rss_hash_type
= MXGEFW_RSS_HASH_TYPE_SRC_PORT
;
4030 if (mxge_initial_mtu
> ETHERMTU_JUMBO
||
4031 mxge_initial_mtu
< ETHER_MIN_LEN
)
4032 mxge_initial_mtu
= ETHERMTU_JUMBO
;
4037 mxge_free_slices(mxge_softc_t
*sc
)
4039 struct mxge_slice_state
*ss
;
4046 for (i
= 0; i
< sc
->num_slices
; i
++) {
4048 if (ss
->fw_stats
!= NULL
) {
4049 mxge_dma_free(&ss
->fw_stats_dma
);
4050 ss
->fw_stats
= NULL
;
4051 #ifdef IFNET_BUF_RING
4052 if (ss
->tx
.br
!= NULL
) {
4053 drbr_free(ss
->tx
.br
, M_DEVBUF
);
4057 lockuninit(&ss
->tx
.lock
);
4059 if (ss
->rx_done
.entry
!= NULL
) {
4060 mxge_dma_free(&ss
->rx_done
.dma
);
4061 ss
->rx_done
.entry
= NULL
;
4064 free(sc
->ss
, M_DEVBUF
);
4069 mxge_alloc_slices(mxge_softc_t
*sc
)
4072 struct mxge_slice_state
*ss
;
4074 int err
, i
, max_intr_slots
;
4076 err
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_RX_RING_SIZE
, &cmd
);
4078 device_printf(sc
->dev
, "Cannot determine rx ring size\n");
4081 sc
->rx_ring_size
= cmd
.data0
;
4082 max_intr_slots
= 2 * (sc
->rx_ring_size
/ sizeof (mcp_dma_addr_t
));
4084 bytes
= sizeof (*sc
->ss
) * sc
->num_slices
;
4085 sc
->ss
= kmalloc(bytes
, M_DEVBUF
, M_NOWAIT
| M_ZERO
);
4088 for (i
= 0; i
< sc
->num_slices
; i
++) {
4093 /* allocate per-slice rx interrupt queues */
4095 bytes
= max_intr_slots
* sizeof (*ss
->rx_done
.entry
);
4096 err
= mxge_dma_alloc(sc
, &ss
->rx_done
.dma
, bytes
, 4096);
4099 ss
->rx_done
.entry
= ss
->rx_done
.dma
.addr
;
4100 bzero(ss
->rx_done
.entry
, bytes
);
4103 * allocate the per-slice firmware stats; stats
4104 * (including tx) are used used only on the first
4107 #ifndef IFNET_BUF_RING
4112 bytes
= sizeof (*ss
->fw_stats
);
4113 err
= mxge_dma_alloc(sc
, &ss
->fw_stats_dma
,
4114 sizeof (*ss
->fw_stats
), 64);
4117 ss
->fw_stats
= (mcp_irq_data_t
*)ss
->fw_stats_dma
.addr
;
4118 snprintf(ss
->tx
.lock_name
, sizeof(ss
->tx
.lock_name
),
4119 "%s:tx(%d)", device_get_nameunit(sc
->dev
), i
);
4120 lock_init(&ss
->tx
.lock
, ss
->tx
.lock_name
, 0, LK_CANRECURSE
);
4121 #ifdef IFNET_BUF_RING
4122 ss
->tx
.br
= buf_ring_alloc(2048, M_DEVBUF
, M_WAITOK
,
4130 mxge_free_slices(sc
);
4135 mxge_slice_probe(mxge_softc_t
*sc
)
4139 int msix_cnt
, status
, max_intr_slots
;
4143 * don't enable multiple slices if they are not enabled,
4144 * or if this is not an SMP system
4147 if (mxge_max_slices
== 0 || mxge_max_slices
== 1 || mp_ncpus
< 2)
4150 /* see how many MSI-X interrupts are available */
4151 msix_cnt
= pci_msix_count(sc
->dev
);
4155 /* now load the slice aware firmware see what it supports */
4156 old_fw
= sc
->fw_name
;
4157 if (old_fw
== mxge_fw_aligned
)
4158 sc
->fw_name
= mxge_fw_rss_aligned
;
4160 sc
->fw_name
= mxge_fw_rss_unaligned
;
4161 status
= mxge_load_firmware(sc
, 0);
4163 device_printf(sc
->dev
, "Falling back to a single slice\n");
4167 /* try to send a reset command to the card to see if it
4169 memset(&cmd
, 0, sizeof (cmd
));
4170 status
= mxge_send_cmd(sc
, MXGEFW_CMD_RESET
, &cmd
);
4172 device_printf(sc
->dev
, "failed reset\n");
4176 /* get rx ring size */
4177 status
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_RX_RING_SIZE
, &cmd
);
4179 device_printf(sc
->dev
, "Cannot determine rx ring size\n");
4182 max_intr_slots
= 2 * (cmd
.data0
/ sizeof (mcp_dma_addr_t
));
4184 /* tell it the size of the interrupt queues */
4185 cmd
.data0
= max_intr_slots
* sizeof (struct mcp_slot
);
4186 status
= mxge_send_cmd(sc
, MXGEFW_CMD_SET_INTRQ_SIZE
, &cmd
);
4188 device_printf(sc
->dev
, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n");
4192 /* ask the maximum number of slices it supports */
4193 status
= mxge_send_cmd(sc
, MXGEFW_CMD_GET_MAX_RSS_QUEUES
, &cmd
);
4195 device_printf(sc
->dev
,
4196 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n");
4199 sc
->num_slices
= cmd
.data0
;
4200 if (sc
->num_slices
> msix_cnt
)
4201 sc
->num_slices
= msix_cnt
;
4203 if (mxge_max_slices
== -1) {
4204 /* cap to number of CPUs in system */
4205 if (sc
->num_slices
> mp_ncpus
)
4206 sc
->num_slices
= mp_ncpus
;
4208 if (sc
->num_slices
> mxge_max_slices
)
4209 sc
->num_slices
= mxge_max_slices
;
4211 /* make sure it is a power of two */
4212 while (sc
->num_slices
& (sc
->num_slices
- 1))
4216 device_printf(sc
->dev
, "using %d slices\n",
4222 sc
->fw_name
= old_fw
;
4223 (void) mxge_load_firmware(sc
, 0);
4227 mxge_add_msix_irqs(mxge_softc_t
*sc
)
4230 int count
, err
, i
, rid
;
4233 sc
->msix_table_res
= bus_alloc_resource_any(sc
->dev
, SYS_RES_MEMORY
,
4236 if (sc
->msix_table_res
== NULL
) {
4237 device_printf(sc
->dev
, "couldn't alloc MSIX table res\n");
4241 count
= sc
->num_slices
;
4242 err
= pci_alloc_msix(sc
->dev
, &count
);
4244 device_printf(sc
->dev
, "pci_alloc_msix: failed, wanted %d"
4245 "err = %d \n", sc
->num_slices
, err
);
4246 goto abort_with_msix_table
;
4248 if (count
< sc
->num_slices
) {
4249 device_printf(sc
->dev
, "pci_alloc_msix: need %d, got %d\n",
4250 count
, sc
->num_slices
);
4251 device_printf(sc
->dev
,
4252 "Try setting hw.mxge.max_slices to %d\n",
4255 goto abort_with_msix
;
4257 bytes
= sizeof (*sc
->msix_irq_res
) * sc
->num_slices
;
4258 sc
->msix_irq_res
= kmalloc(bytes
, M_DEVBUF
, M_NOWAIT
|M_ZERO
);
4259 if (sc
->msix_irq_res
== NULL
) {
4261 goto abort_with_msix
;
4264 for (i
= 0; i
< sc
->num_slices
; i
++) {
4266 sc
->msix_irq_res
[i
] = bus_alloc_resource_any(sc
->dev
,
4269 if (sc
->msix_irq_res
[i
] == NULL
) {
4270 device_printf(sc
->dev
, "couldn't allocate IRQ res"
4271 " for message %d\n", i
);
4273 goto abort_with_res
;
4277 bytes
= sizeof (*sc
->msix_ih
) * sc
->num_slices
;
4278 sc
->msix_ih
= kmalloc(bytes
, M_DEVBUF
, M_NOWAIT
|M_ZERO
);
4280 for (i
= 0; i
< sc
->num_slices
; i
++) {
4281 err
= bus_setup_intr(sc
->dev
, sc
->msix_irq_res
[i
],
4282 INTR_TYPE_NET
| INTR_MPSAFE
,
4283 #if __FreeBSD_version > 700030
4286 mxge_intr
, &sc
->ss
[i
], &sc
->msix_ih
[i
]);
4288 device_printf(sc
->dev
, "couldn't setup intr for "
4290 goto abort_with_intr
;
4295 device_printf(sc
->dev
, "using %d msix IRQs:",
4297 for (i
= 0; i
< sc
->num_slices
; i
++)
4298 printf(" %ld", rman_get_start(sc
->msix_irq_res
[i
]));
4304 for (i
= 0; i
< sc
->num_slices
; i
++) {
4305 if (sc
->msix_ih
[i
] != NULL
) {
4306 bus_teardown_intr(sc
->dev
, sc
->msix_irq_res
[i
],
4308 sc
->msix_ih
[i
] = NULL
;
4311 kfree(sc
->msix_ih
, M_DEVBUF
);
4315 for (i
= 0; i
< sc
->num_slices
; i
++) {
4317 if (sc
->msix_irq_res
[i
] != NULL
)
4318 bus_release_resource(sc
->dev
, SYS_RES_IRQ
, rid
,
4319 sc
->msix_irq_res
[i
]);
4320 sc
->msix_irq_res
[i
] = NULL
;
4322 kfree(sc
->msix_irq_res
, M_DEVBUF
);
4326 pci_release_msi(sc
->dev
);
4328 abort_with_msix_table
:
4329 bus_release_resource(sc
->dev
, SYS_RES_MEMORY
, PCIR_BAR(2),
4330 sc
->msix_table_res
);
4336 mxge_add_single_irq(mxge_softc_t
*sc
)
4338 int count
, err
, rid
;
4340 count
= pci_msi_count(sc
->dev
);
4341 if (count
== 1 && pci_alloc_msi(sc
->dev
, &count
) == 0) {
4347 sc
->irq_res
= bus_alloc_resource(sc
->dev
, SYS_RES_IRQ
, &rid
, 0, ~0,
4348 1, RF_SHAREABLE
| RF_ACTIVE
);
4349 if (sc
->irq_res
== NULL
) {
4350 device_printf(sc
->dev
, "could not alloc interrupt\n");
4354 device_printf(sc
->dev
, "using %s irq %ld\n",
4355 sc
->legacy_irq
? "INTx" : "MSI",
4356 rman_get_start(sc
->irq_res
));
4357 err
= bus_setup_intr(sc
->dev
, sc
->irq_res
,
4358 INTR_TYPE_NET
| INTR_MPSAFE
,
4359 #if __FreeBSD_version > 700030
4362 mxge_intr
, &sc
->ss
[0], &sc
->ih
);
4364 bus_release_resource(sc
->dev
, SYS_RES_IRQ
,
4365 sc
->legacy_irq
? 0 : 1, sc
->irq_res
);
4366 if (!sc
->legacy_irq
)
4367 pci_release_msi(sc
->dev
);
4373 mxge_rem_msix_irqs(mxge_softc_t
*sc
)
4377 for (i
= 0; i
< sc
->num_slices
; i
++) {
4378 if (sc
->msix_ih
[i
] != NULL
) {
4379 bus_teardown_intr(sc
->dev
, sc
->msix_irq_res
[i
],
4381 sc
->msix_ih
[i
] = NULL
;
4384 kfree(sc
->msix_ih
, M_DEVBUF
);
4386 for (i
= 0; i
< sc
->num_slices
; i
++) {
4388 if (sc
->msix_irq_res
[i
] != NULL
)
4389 bus_release_resource(sc
->dev
, SYS_RES_IRQ
, rid
,
4390 sc
->msix_irq_res
[i
]);
4391 sc
->msix_irq_res
[i
] = NULL
;
4393 kfree(sc
->msix_irq_res
, M_DEVBUF
);
4395 bus_release_resource(sc
->dev
, SYS_RES_MEMORY
, PCIR_BAR(2),
4396 sc
->msix_table_res
);
4398 pci_release_msi(sc
->dev
);
4403 mxge_rem_single_irq(mxge_softc_t
*sc
)
4405 bus_teardown_intr(sc
->dev
, sc
->irq_res
, sc
->ih
);
4406 bus_release_resource(sc
->dev
, SYS_RES_IRQ
,
4407 sc
->legacy_irq
? 0 : 1, sc
->irq_res
);
4408 if (!sc
->legacy_irq
)
4409 pci_release_msi(sc
->dev
);
4413 mxge_rem_irq(mxge_softc_t
*sc
)
4415 if (sc
->num_slices
> 1)
4416 mxge_rem_msix_irqs(sc
);
4418 mxge_rem_single_irq(sc
);
4422 mxge_add_irq(mxge_softc_t
*sc
)
4426 if (sc
->num_slices
> 1)
4427 err
= mxge_add_msix_irqs(sc
);
4429 err
= mxge_add_single_irq(sc
);
4431 if (0 && err
== 0 && sc
->num_slices
> 1) {
4432 mxge_rem_msix_irqs(sc
);
4433 err
= mxge_add_msix_irqs(sc
);
4440 mxge_attach(device_t dev
)
4442 mxge_softc_t
*sc
= device_get_softc(dev
);
4447 mxge_fetch_tunables(sc
);
4449 err
= bus_dma_tag_create(NULL
, /* parent */
4452 BUS_SPACE_MAXADDR
, /* low */
4453 BUS_SPACE_MAXADDR
, /* high */
4454 NULL
, NULL
, /* filter */
4455 65536 + 256, /* maxsize */
4456 MXGE_MAX_SEND_DESC
, /* num segs */
4457 65536, /* maxsegsize */
4459 NULL
, NULL
, /* lock */
4460 &sc
->parent_dmat
); /* tag */
4463 device_printf(sc
->dev
, "Err %d allocating parent dmat\n",
4465 goto abort_with_nothing
;
4468 ifp
= sc
->ifp
= if_alloc(IFT_ETHER
);
4470 device_printf(dev
, "can not if_alloc()\n");
4472 goto abort_with_parent_dmat
;
4474 if_initname(ifp
, device_get_name(dev
), device_get_unit(dev
));
4476 snprintf(sc
->cmd_lock_name
, sizeof(sc
->cmd_lock_name
), "%s:cmd",
4477 device_get_nameunit(dev
));
4478 lock_init(&sc
->cmd_lock
, sc
->cmd_lock_name
, 0, LK_CANRECURSE
);
4479 snprintf(sc
->driver_lock_name
, sizeof(sc
->driver_lock_name
),
4480 "%s:drv", device_get_nameunit(dev
));
4481 lock_init(&sc
->driver_lock
, sc
->driver_lock_name
,
4484 callout_init(&sc
->co_hdl
);
4486 mxge_setup_cfg_space(sc
);
4488 /* Map the board into the kernel */
4490 sc
->mem_res
= bus_alloc_resource(dev
, SYS_RES_MEMORY
, &rid
, 0,
4492 if (sc
->mem_res
== NULL
) {
4493 device_printf(dev
, "could not map memory\n");
4495 goto abort_with_lock
;
4497 sc
->sram
= rman_get_virtual(sc
->mem_res
);
4498 sc
->sram_size
= 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100;
4499 if (sc
->sram_size
> rman_get_size(sc
->mem_res
)) {
4500 device_printf(dev
, "impossible memory region size %ld\n",
4501 rman_get_size(sc
->mem_res
));
4503 goto abort_with_mem_res
;
4506 /* make NULL terminated copy of the EEPROM strings section of
4508 bzero(sc
->eeprom_strings
, MXGE_EEPROM_STRINGS_SIZE
);
4509 bus_space_read_region_1(rman_get_bustag(sc
->mem_res
),
4510 rman_get_bushandle(sc
->mem_res
),
4511 sc
->sram_size
- MXGE_EEPROM_STRINGS_SIZE
,
4513 MXGE_EEPROM_STRINGS_SIZE
- 2);
4514 err
= mxge_parse_strings(sc
);
4516 goto abort_with_mem_res
;
4518 /* Enable write combining for efficient use of PCIe bus */
4521 /* Allocate the out of band dma memory */
4522 err
= mxge_dma_alloc(sc
, &sc
->cmd_dma
,
4523 sizeof (mxge_cmd_t
), 64);
4525 goto abort_with_mem_res
;
4526 sc
->cmd
= (mcp_cmd_response_t
*) sc
->cmd_dma
.addr
;
4527 err
= mxge_dma_alloc(sc
, &sc
->zeropad_dma
, 64, 64);
4529 goto abort_with_cmd_dma
;
4531 err
= mxge_dma_alloc(sc
, &sc
->dmabench_dma
, 4096, 4096);
4533 goto abort_with_zeropad_dma
;
4535 /* select & load the firmware */
4536 err
= mxge_select_firmware(sc
);
4538 goto abort_with_dmabench
;
4539 sc
->intr_coal_delay
= mxge_intr_coal_delay
;
4541 mxge_slice_probe(sc
);
4542 err
= mxge_alloc_slices(sc
);
4544 goto abort_with_dmabench
;
4546 err
= mxge_reset(sc
, 0);
4548 goto abort_with_slices
;
4550 err
= mxge_alloc_rings(sc
);
4552 device_printf(sc
->dev
, "failed to allocate rings\n");
4553 goto abort_with_dmabench
;
4556 err
= mxge_add_irq(sc
);
4558 device_printf(sc
->dev
, "failed to add irq\n");
4559 goto abort_with_rings
;
4562 ifp
->if_baudrate
= IF_Gbps(10UL);
4563 ifp
->if_capabilities
= IFCAP_RXCSUM
| IFCAP_TXCSUM
| IFCAP_TSO4
|
4566 ifp
->if_capabilities
|= IFCAP_LRO
;
4569 #ifdef MXGE_NEW_VLAN_API
4570 ifp
->if_capabilities
|= IFCAP_VLAN_HWTAGGING
| IFCAP_VLAN_HWCSUM
;
4573 sc
->max_mtu
= mxge_max_mtu(sc
);
4574 if (sc
->max_mtu
>= 9000)
4575 ifp
->if_capabilities
|= IFCAP_JUMBO_MTU
;
4577 device_printf(dev
, "MTU limited to %d. Install "
4578 "latest firmware for 9000 byte jumbo support\n",
4579 sc
->max_mtu
- ETHER_HDR_LEN
);
4580 ifp
->if_hwassist
= CSUM_TCP
| CSUM_UDP
| CSUM_TSO
;
4581 ifp
->if_capenable
= ifp
->if_capabilities
;
4582 if (sc
->lro_cnt
== 0)
4583 ifp
->if_capenable
&= ~IFCAP_LRO
;
4585 ifp
->if_init
= mxge_init
;
4587 ifp
->if_flags
= IFF_BROADCAST
| IFF_SIMPLEX
| IFF_MULTICAST
;
4588 ifp
->if_ioctl
= mxge_ioctl
;
4589 ifp
->if_start
= mxge_start
;
4590 /* Initialise the ifmedia structure */
4591 ifmedia_init(&sc
->media
, 0, mxge_media_change
,
4593 mxge_set_media(sc
, IFM_ETHER
| IFM_AUTO
);
4594 mxge_media_probe(sc
);
4596 ether_ifattach(ifp
, sc
->mac_addr
);
4597 /* ether_ifattach sets mtu to ETHERMTU */
4598 if (mxge_initial_mtu
!= ETHERMTU
)
4599 mxge_change_mtu(sc
, mxge_initial_mtu
);
4601 mxge_add_sysctls(sc
);
4602 #ifdef IFNET_BUF_RING
4603 ifp
->if_transmit
= mxge_transmit
;
4604 ifp
->if_qflush
= mxge_qflush
;
4609 mxge_free_rings(sc
);
4611 mxge_free_slices(sc
);
4612 abort_with_dmabench
:
4613 mxge_dma_free(&sc
->dmabench_dma
);
4614 abort_with_zeropad_dma
:
4615 mxge_dma_free(&sc
->zeropad_dma
);
4617 mxge_dma_free(&sc
->cmd_dma
);
4619 bus_release_resource(dev
, SYS_RES_MEMORY
, PCIR_BARS
, sc
->mem_res
);
4621 pci_disable_busmaster(dev
);
4622 lockuninit(&sc
->cmd_lock
);
4623 lockuninit(&sc
->driver_lock
);
4625 abort_with_parent_dmat
:
4626 bus_dma_tag_destroy(sc
->parent_dmat
);
4633 mxge_detach(device_t dev
)
4635 mxge_softc_t
*sc
= device_get_softc(dev
);
4637 if (mxge_vlans_active(sc
)) {
4638 device_printf(sc
->dev
,
4639 "Detach vlans before removing module\n");
4642 lockmgr(&sc
->driver_lock
, LK_EXCLUSIVE
);
4644 if (sc
->ifp
->if_drv_flags
& IFF_DRV_RUNNING
)
4646 lock(&sc
->driver_lock
, LK_RELEASE
);
4647 ether_ifdetach(sc
->ifp
);
4648 callout_drain(&sc
->co_hdl
);
4649 ifmedia_removeall(&sc
->media
);
4650 mxge_dummy_rdma(sc
, 0);
4651 mxge_rem_sysctls(sc
);
4653 mxge_free_rings(sc
);
4654 mxge_free_slices(sc
);
4655 mxge_dma_free(&sc
->dmabench_dma
);
4656 mxge_dma_free(&sc
->zeropad_dma
);
4657 mxge_dma_free(&sc
->cmd_dma
);
4658 bus_release_resource(dev
, SYS_RES_MEMORY
, PCIR_BARS
, sc
->mem_res
);
4659 pci_disable_busmaster(dev
);
4660 lockuninit(&sc
->cmd_lock
);
4661 lockuninit(&sc
->driver_lock
);
4663 bus_dma_tag_destroy(sc
->parent_dmat
);
4668 mxge_shutdown(device_t dev
)
4674 This file uses Myri10GE driver indentation.
4677 c-file-style:"linux"