e1000 - Make igb driver compile on DragonFly
[dragonfly.git] / sys / dev / netif / e1000 / if_igb.c
blobca8394f3d40b38040af6c1a0e9e05ebcd0768ee5
1 /******************************************************************************
3 Copyright (c) 2001-2010, Intel Corporation
4 All rights reserved.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_altq.h"
39 #endif
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
60 #ifdef IGB_IEEE1588
61 #include <sys/ieee1588.h>
62 #endif
64 #include <net/bpf.h>
65 #include <net/ethernet.h>
66 #include <net/if.h>
67 #include <net/if_arp.h>
68 #include <net/if_dl.h>
69 #include <net/if_media.h>
70 #include <net/ifq_var.h>
72 #include <net/if_types.h>
73 #include <net/vlan/if_vlan_var.h>
74 #include <net/vlan/if_vlan_ether.h>
76 #include <netinet/in_systm.h>
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #include <netinet/ip.h>
80 #include <netinet/ip6.h>
81 #include <netinet/tcp.h>
82 #ifdef NET_LRO
83 #include <netinet/tcp_lro.h>
84 #endif
85 #include <netinet/udp.h>
87 #include <sys/in_cksum.h>
88 #include <bus/pci/pcivar.h>
89 #include <bus/pci/pcireg.h>
91 #include "e1000_api.h"
92 #include "e1000_82575.h"
93 #include "if_igb.h"
95 /*********************************************************************
96 * Set this to one to display debug statistics
97 *********************************************************************/
98 int igb_display_debug_stats = 0;
100 /*********************************************************************
101 * Driver version:
102 *********************************************************************/
103 char igb_driver_version[] = "version - 1.9.1";
106 /*********************************************************************
107 * PCI Device ID Table
109 * Used by probe to select devices to load on
110 * Last field stores an index into e1000_strings
111 * Last entry must be all 0s
113 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
116 static igb_vendor_info_t igb_vendor_info_array[] =
118 { 0x8086, E1000_DEV_ID_82575EB_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
119 { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120 PCI_ANY_ID, PCI_ANY_ID, 0},
121 { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122 PCI_ANY_ID, PCI_ANY_ID, 0},
123 { 0x8086, E1000_DEV_ID_82576, PCI_ANY_ID, PCI_ANY_ID, 0},
124 { 0x8086, E1000_DEV_ID_82576_NS, PCI_ANY_ID, PCI_ANY_ID, 0},
125 { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
126 { 0x8086, E1000_DEV_ID_82576_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
127 { 0x8086, E1000_DEV_ID_82576_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
128 { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129 PCI_ANY_ID, PCI_ANY_ID, 0},
130 { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131 PCI_ANY_ID, PCI_ANY_ID, 0},
132 { 0x8086, E1000_DEV_ID_82580_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
133 { 0x8086, E1000_DEV_ID_82580_FIBER, PCI_ANY_ID, PCI_ANY_ID, 0},
134 { 0x8086, E1000_DEV_ID_82580_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
135 { 0x8086, E1000_DEV_ID_82580_SGMII, PCI_ANY_ID, PCI_ANY_ID, 0},
136 { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
137 PCI_ANY_ID, PCI_ANY_ID, 0},
138 /* required last entry */
139 { 0, 0, 0, 0, 0}
142 /*********************************************************************
143 * Table of branding strings for all supported NICs.
144 *********************************************************************/
146 static char *igb_strings[] = {
147 "Intel(R) PRO/1000 Network Connection"
150 /*********************************************************************
151 * Function prototypes
152 *********************************************************************/
153 static int igb_probe(device_t);
154 static int igb_attach(device_t);
155 static int igb_detach(device_t);
156 static int igb_shutdown(device_t);
157 static int igb_suspend(device_t);
158 static int igb_resume(device_t);
159 static void igb_start(struct ifnet *);
160 static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
161 #if __FreeBSD_version >= 800000
162 static int igb_mq_start(struct ifnet *, struct mbuf *);
163 static int igb_mq_start_locked(struct ifnet *,
164 struct tx_ring *, struct mbuf *);
165 static void igb_qflush(struct ifnet *);
166 #endif
167 static int igb_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
168 static void igb_init(void *);
169 static void igb_init_locked(struct adapter *);
170 static void igb_stop(void *);
171 static void igb_media_status(struct ifnet *, struct ifmediareq *);
172 static int igb_media_change(struct ifnet *);
173 static void igb_identify_hardware(struct adapter *);
174 static int igb_allocate_pci_resources(struct adapter *);
175 static int igb_allocate_msix(struct adapter *);
176 static int igb_allocate_legacy(struct adapter *);
177 static int igb_setup_msix(struct adapter *);
178 static void igb_free_pci_resources(struct adapter *);
179 static void igb_local_timer(void *);
180 static void igb_reset(struct adapter *);
181 static void igb_setup_interface(device_t, struct adapter *);
182 static int igb_allocate_queues(struct adapter *);
183 static void igb_configure_queues(struct adapter *);
185 static int igb_allocate_transmit_buffers(struct tx_ring *);
186 static void igb_setup_transmit_structures(struct adapter *);
187 static void igb_setup_transmit_ring(struct tx_ring *);
188 static void igb_initialize_transmit_units(struct adapter *);
189 static void igb_free_transmit_structures(struct adapter *);
190 static void igb_free_transmit_buffers(struct tx_ring *);
192 static int igb_allocate_receive_buffers(struct rx_ring *);
193 static int igb_setup_receive_structures(struct adapter *);
194 static int igb_setup_receive_ring(struct rx_ring *);
195 static void igb_initialize_receive_units(struct adapter *);
196 static void igb_free_receive_structures(struct adapter *);
197 static void igb_free_receive_buffers(struct rx_ring *);
198 static void igb_free_receive_ring(struct rx_ring *);
200 static void igb_enable_intr(struct adapter *);
201 static void igb_disable_intr(struct adapter *);
202 static void igb_update_stats_counters(struct adapter *);
203 static bool igb_txeof(struct tx_ring *);
205 static __inline void igb_rx_discard(struct rx_ring *,
206 union e1000_adv_rx_desc *, int);
207 static __inline void igb_rx_input(struct rx_ring *,
208 struct ifnet *, struct mbuf *, u32);
210 static bool igb_rxeof(struct rx_ring *, int);
211 static void igb_rx_checksum(u32, struct mbuf *, u32);
212 static int igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
213 #if NET_TSO
214 static bool igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
215 #endif
216 static void igb_set_promisc(struct adapter *);
217 static void igb_disable_promisc(struct adapter *);
218 static void igb_set_multi(struct adapter *);
219 static void igb_print_hw_stats(struct adapter *);
220 static void igb_update_link_status(struct adapter *);
221 static int igb_get_buf(struct rx_ring *, int, u8);
223 static void igb_register_vlan(void *, struct ifnet *, u16);
224 static void igb_unregister_vlan(void *, struct ifnet *, u16);
225 static void igb_setup_vlan_hw_support(struct adapter *);
227 static int igb_xmit(struct tx_ring *, struct mbuf **);
228 static int igb_dma_malloc(struct adapter *, bus_size_t,
229 struct igb_dma_alloc *, int);
230 static void igb_dma_free(struct adapter *, struct igb_dma_alloc *);
231 static void igb_print_debug_info(struct adapter *);
232 static void igb_print_nvm_info(struct adapter *);
233 static int igb_is_valid_ether_addr(u8 *);
234 static int igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
235 static int igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
236 /* Management and WOL Support */
237 static void igb_init_manageability(struct adapter *);
238 static void igb_release_manageability(struct adapter *);
239 static void igb_get_hw_control(struct adapter *);
240 static void igb_release_hw_control(struct adapter *);
241 static void igb_enable_wakeup(device_t);
243 static void igb_irq_fast(void *);
244 static void igb_add_rx_process_limit(struct adapter *, const char *,
245 const char *, int *, int);
246 static void igb_handle_rxtx(void *context, int pending);
247 static void igb_handle_que(void *context, int pending);
248 static void igb_handle_link(void *context, int pending);
250 /* These are MSIX only irq handlers */
251 static void igb_msix_que(void *);
252 static void igb_msix_link(void *);
254 #ifdef DEVICE_POLLING
255 static poll_handler_t igb_poll;
256 #endif /* POLLING */
258 /*********************************************************************
259 * FreeBSD Device Interface Entry Points
260 *********************************************************************/
262 static device_method_t igb_methods[] = {
263 /* Device interface */
264 DEVMETHOD(device_probe, igb_probe),
265 DEVMETHOD(device_attach, igb_attach),
266 DEVMETHOD(device_detach, igb_detach),
267 DEVMETHOD(device_shutdown, igb_shutdown),
268 DEVMETHOD(device_suspend, igb_suspend),
269 DEVMETHOD(device_resume, igb_resume),
270 {0, 0}
273 static driver_t igb_driver = {
274 "igb", igb_methods, sizeof(struct adapter),
277 static devclass_t igb_devclass;
278 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
279 MODULE_DEPEND(igb, pci, 1, 1, 1);
280 MODULE_DEPEND(igb, ether, 1, 1, 1);
282 /*********************************************************************
283 * Tunable default values.
284 *********************************************************************/
286 /* Descriptor defaults */
287 static int igb_rxd = IGB_DEFAULT_RXD;
288 static int igb_txd = IGB_DEFAULT_TXD;
289 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
290 TUNABLE_INT("hw.igb.txd", &igb_txd);
293 ** AIM: Adaptive Interrupt Moderation
294 ** which means that the interrupt rate
295 ** is varied over time based on the
296 ** traffic for that interrupt vector
298 static int igb_enable_aim = TRUE;
299 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
302 * MSIX should be the default for best performance,
303 * but this allows it to be forced off for testing.
305 static int igb_enable_msix = 1;
306 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
309 * Header split has seemed to be beneficial in
310 * many circumstances tested, however there have
311 * been some stability issues, so the default is
312 * off.
314 static bool igb_header_split = FALSE;
315 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
318 ** This will autoconfigure based on
319 ** the number of CPUs if left at 0.
321 static int igb_num_queues = 0;
322 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
324 /* How many packets rxeof tries to clean at a time */
325 static int igb_rx_process_limit = 100;
326 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
328 /* Flow control setting - default to FULL */
329 static int igb_fc_setting = e1000_fc_full;
330 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
333 ** Shadow VFTA table, this is needed because
334 ** the real filter table gets cleared during
335 ** a soft reset and the driver needs to be able
336 ** to repopulate it.
338 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
341 /*********************************************************************
342 * Device identification routine
344 * igb_probe determines if the driver should be loaded on
345 * adapter based on PCI vendor/device id of the adapter.
347 * return BUS_PROBE_DEFAULT on success, positive on failure
348 *********************************************************************/
350 static int
351 igb_probe(device_t dev)
353 char adapter_name[60];
354 uint16_t pci_vendor_id = 0;
355 uint16_t pci_device_id = 0;
356 uint16_t pci_subvendor_id = 0;
357 uint16_t pci_subdevice_id = 0;
358 igb_vendor_info_t *ent;
360 INIT_DEBUGOUT("igb_probe: begin");
362 pci_vendor_id = pci_get_vendor(dev);
363 if (pci_vendor_id != IGB_VENDOR_ID)
364 return (ENXIO);
366 pci_device_id = pci_get_device(dev);
367 pci_subvendor_id = pci_get_subvendor(dev);
368 pci_subdevice_id = pci_get_subdevice(dev);
370 ent = igb_vendor_info_array;
371 while (ent->vendor_id != 0) {
372 if ((pci_vendor_id == ent->vendor_id) &&
373 (pci_device_id == ent->device_id) &&
375 ((pci_subvendor_id == ent->subvendor_id) ||
376 (ent->subvendor_id == PCI_ANY_ID)) &&
378 ((pci_subdevice_id == ent->subdevice_id) ||
379 (ent->subdevice_id == PCI_ANY_ID))) {
380 ksprintf(adapter_name, "%s %s",
381 igb_strings[ent->index],
382 igb_driver_version);
383 device_set_desc_copy(dev, adapter_name);
384 return (BUS_PROBE_DEFAULT);
386 ent++;
389 return (ENXIO);
392 /*********************************************************************
393 * Device initialization routine
395 * The attach entry point is called when the driver is being loaded.
396 * This routine identifies the type of hardware, allocates all resources
397 * and initializes the hardware.
399 * return 0 on success, positive on failure
400 *********************************************************************/
402 static int
403 igb_attach(device_t dev)
405 struct adapter *adapter;
406 int error = 0;
407 u16 eeprom_data;
409 INIT_DEBUGOUT("igb_attach: begin");
411 adapter = device_get_softc(dev);
412 adapter->dev = adapter->osdep.dev = dev;
413 IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
415 /* SYSCTL stuff */
416 sysctl_ctx_init(&adapter->sysctl_ctx);
417 adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
418 SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
419 device_get_nameunit(adapter->dev),
420 CTLFLAG_RD, 0, "");
421 if (adapter->sysctl_tree == NULL) {
422 device_printf(adapter->dev, "can't add sysctl node\n");
423 error = ENOMEM;
424 goto err_sysctl;
427 SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
428 SYSCTL_CHILDREN(adapter->sysctl_tree),
429 OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430 igb_sysctl_debug_info, "I", "Debug Information");
432 SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
433 SYSCTL_CHILDREN(adapter->sysctl_tree),
434 OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
435 igb_sysctl_stats, "I", "Statistics");
437 SYSCTL_ADD_INT(&adapter->sysctl_ctx,
438 SYSCTL_CHILDREN(adapter->sysctl_tree),
439 OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
440 &igb_fc_setting, 0, "Flow Control");
442 SYSCTL_ADD_INT(&adapter->sysctl_ctx,
443 SYSCTL_CHILDREN(adapter->sysctl_tree),
444 OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
445 &igb_enable_aim, 1, "Interrupt Moderation");
447 callout_init(&adapter->timer);
449 /* Determine hardware and mac info */
450 igb_identify_hardware(adapter);
452 /* Setup PCI resources */
453 if (igb_allocate_pci_resources(adapter)) {
454 device_printf(dev, "Allocation of PCI resources failed\n");
455 error = ENXIO;
456 goto err_pci;
459 /* Do Shared Code initialization */
460 if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
461 device_printf(dev, "Setup of Shared code failed\n");
462 error = ENXIO;
463 goto err_pci;
466 e1000_get_bus_info(&adapter->hw);
468 /* Sysctls for limiting the amount of work done in the taskqueue */
469 igb_add_rx_process_limit(adapter, "rx_processing_limit",
470 "max number of rx packets to process", &adapter->rx_process_limit,
471 igb_rx_process_limit);
474 * Validate number of transmit and receive descriptors. It
475 * must not exceed hardware maximum, and must be multiple
476 * of E1000_DBA_ALIGN.
478 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
479 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
480 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
481 IGB_DEFAULT_TXD, igb_txd);
482 adapter->num_tx_desc = IGB_DEFAULT_TXD;
483 } else
484 adapter->num_tx_desc = igb_txd;
485 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
486 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
487 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
488 IGB_DEFAULT_RXD, igb_rxd);
489 adapter->num_rx_desc = IGB_DEFAULT_RXD;
490 } else
491 adapter->num_rx_desc = igb_rxd;
493 adapter->hw.mac.autoneg = DO_AUTO_NEG;
494 adapter->hw.phy.autoneg_wait_to_complete = FALSE;
495 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
497 /* Copper options */
498 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
499 adapter->hw.phy.mdix = AUTO_ALL_MODES;
500 adapter->hw.phy.disable_polarity_correction = FALSE;
501 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
505 * Set the frame limits assuming
506 * standard ethernet sized frames.
508 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
509 adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
512 ** Allocate and Setup Queues
514 if (igb_allocate_queues(adapter)) {
515 error = ENOMEM;
516 goto err_pci;
520 ** Start from a known state, this is
521 ** important in reading the nvm and
522 ** mac from that.
524 e1000_reset_hw(&adapter->hw);
526 /* Make sure we have a good EEPROM before we read from it */
527 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
529 ** Some PCI-E parts fail the first check due to
530 ** the link being in sleep state, call it again,
531 ** if it fails a second time its a real issue.
533 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
534 device_printf(dev,
535 "The EEPROM Checksum Is Not Valid\n");
536 error = EIO;
537 goto err_late;
542 ** Copy the permanent MAC address out of the EEPROM
544 if (e1000_read_mac_addr(&adapter->hw) < 0) {
545 device_printf(dev, "EEPROM read error while reading MAC"
546 " address\n");
547 error = EIO;
548 goto err_late;
550 /* Check its sanity */
551 if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
552 device_printf(dev, "Invalid MAC address\n");
553 error = EIO;
554 goto err_late;
558 ** Configure Interrupts
560 if ((adapter->msix > 1) && (igb_enable_msix))
561 error = igb_allocate_msix(adapter);
562 else /* MSI or Legacy */
563 error = igb_allocate_legacy(adapter);
564 if (error)
565 goto err_late;
567 /* Setup OS specific network interface */
568 igb_setup_interface(dev, adapter);
570 /* Now get a good starting state */
571 igb_reset(adapter);
573 /* Initialize statistics */
574 igb_update_stats_counters(adapter);
576 adapter->hw.mac.get_link_status = 1;
577 igb_update_link_status(adapter);
579 /* Indicate SOL/IDER usage */
580 if (e1000_check_reset_block(&adapter->hw))
581 device_printf(dev,
582 "PHY reset is blocked due to SOL/IDER session.\n");
584 /* Determine if we have to control management hardware */
585 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
588 * Setup Wake-on-Lan
590 /* APME bit in EEPROM is mapped to WUC.APME */
591 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
592 if (eeprom_data)
593 adapter->wol = E1000_WUFC_MAG;
595 /* Register for VLAN events */
596 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
597 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
598 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
599 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
601 /* Tell the stack that the interface is not active */
602 adapter->ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
604 INIT_DEBUGOUT("igb_attach: end");
606 return (0);
608 err_late:
609 igb_free_transmit_structures(adapter);
610 igb_free_receive_structures(adapter);
611 igb_release_hw_control(adapter);
612 err_pci:
613 igb_free_pci_resources(adapter);
614 err_sysctl:
615 sysctl_ctx_free(&adapter->sysctl_ctx);
616 IGB_CORE_LOCK_DESTROY(adapter);
618 return (error);
621 /*********************************************************************
622 * Device removal routine
624 * The detach entry point is called when the driver is being removed.
625 * This routine stops the adapter and deallocates all the resources
626 * that were allocated for driver operation.
628 * return 0 on success, positive on failure
629 *********************************************************************/
631 static int
632 igb_detach(device_t dev)
634 struct adapter *adapter = device_get_softc(dev);
636 INIT_DEBUGOUT("igb_detach: begin");
638 /* Make sure VLANS are not using driver */
639 if (adapter->ifp->if_vlantrunks != NULL) {
640 device_printf(dev,"Vlan in use, detach first\n");
641 return (EBUSY);
644 #ifdef DEVICE_POLLING
645 if (adapter->ifp->if_capenable & IFCAP_POLLING)
646 ether_poll_deregister(adapter->ifp);
647 #endif
649 IGB_CORE_LOCK(adapter);
650 adapter->in_detach = 1;
651 igb_stop(adapter);
652 IGB_CORE_UNLOCK(adapter);
654 e1000_phy_hw_reset(&adapter->hw);
656 /* Give control back to firmware */
657 igb_release_manageability(adapter);
658 igb_release_hw_control(adapter);
660 if (adapter->wol) {
661 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
662 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
663 igb_enable_wakeup(dev);
666 /* Unregister VLAN events */
667 if (adapter->vlan_attach != NULL)
668 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
669 if (adapter->vlan_detach != NULL)
670 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
672 ether_ifdetach(adapter->ifp);
674 //callout_drain(&adapter->timer);
675 callout_stop(&adapter->timer);
677 igb_free_pci_resources(adapter);
678 bus_generic_detach(dev);
680 igb_free_transmit_structures(adapter);
681 igb_free_receive_structures(adapter);
683 sysctl_ctx_free(&adapter->sysctl_ctx);
684 IGB_CORE_LOCK_DESTROY(adapter);
686 return (0);
689 /*********************************************************************
691 * Shutdown entry point
693 **********************************************************************/
695 static int
696 igb_shutdown(device_t dev)
698 return igb_suspend(dev);
702 * Suspend/resume device methods.
704 static int
705 igb_suspend(device_t dev)
707 struct adapter *adapter = device_get_softc(dev);
709 IGB_CORE_LOCK(adapter);
711 igb_stop(adapter);
713 igb_release_manageability(adapter);
714 igb_release_hw_control(adapter);
716 if (adapter->wol) {
717 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
718 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
719 igb_enable_wakeup(dev);
722 IGB_CORE_UNLOCK(adapter);
724 return bus_generic_suspend(dev);
727 static int
728 igb_resume(device_t dev)
730 struct adapter *adapter = device_get_softc(dev);
731 struct ifnet *ifp = adapter->ifp;
733 IGB_CORE_LOCK(adapter);
734 igb_init_locked(adapter);
735 igb_init_manageability(adapter);
737 if ((ifp->if_flags & IFF_UP) &&
738 (ifp->if_flags & IFF_RUNNING))
739 igb_start(ifp);
741 IGB_CORE_UNLOCK(adapter);
743 return bus_generic_resume(dev);
747 /*********************************************************************
748 * Transmit entry point
750 * igb_start is called by the stack to initiate a transmit.
751 * The driver will remain in this routine as long as there are
752 * packets to transmit and transmit resources are available.
753 * In case resources are not available stack is notified and
754 * the packet is requeued.
755 **********************************************************************/
757 static void
758 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
760 struct adapter *adapter = ifp->if_softc;
761 struct mbuf *m_head;
763 IGB_TX_LOCK_ASSERT(txr);
765 if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) !=
766 IFF_RUNNING)
767 return;
768 if (!adapter->link_active)
769 return;
771 while (!ifq_is_empty(&ifp->if_snd)) {
773 m_head = ifq_dequeue(&ifp->if_snd, NULL);
774 if (m_head == NULL)
775 break;
777 * Encapsulation can modify our pointer, and or make it
778 * NULL on failure. In that event, we can't requeue.
780 if (igb_xmit(txr, &m_head)) {
781 if (m_head == NULL)
782 break;
783 ifp->if_flags |= IFF_OACTIVE;
784 ifq_prepend(&ifp->if_snd, m_head);
785 break;
788 /* Send a copy of the frame to the BPF listener */
789 ETHER_BPF_MTAP(ifp, m_head);
791 /* Set watchdog on */
792 txr->watchdog_check = TRUE;
797 * Legacy TX driver routine, called from the
798 * stack, always uses tx[0], and spins for it.
799 * Should not be used with multiqueue tx
801 static void
802 igb_start(struct ifnet *ifp)
804 struct adapter *adapter = ifp->if_softc;
805 struct tx_ring *txr = adapter->tx_rings;
807 if (ifp->if_flags & IFF_RUNNING) {
808 IGB_TX_LOCK(txr);
809 igb_start_locked(txr, ifp);
810 IGB_TX_UNLOCK(txr);
812 return;
815 #if __FreeBSD_version >= 800000
817 ** Multiqueue Transmit driver
820 static int
821 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
823 struct adapter *adapter = ifp->if_softc;
824 struct tx_ring *txr;
825 int i = 0, err = 0;
827 /* Which queue to use */
828 if ((m->m_flags & M_FLOWID) != 0)
829 i = m->m_pkthdr.flowid % adapter->num_queues;
830 txr = &adapter->tx_rings[i];
832 if (IGB_TX_TRYLOCK(txr)) {
833 err = igb_mq_start_locked(ifp, txr, m);
834 IGB_TX_UNLOCK(txr);
835 } else
836 err = drbr_enqueue(ifp, txr->br, m);
838 return (err);
841 static int
842 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
844 struct adapter *adapter = txr->adapter;
845 struct mbuf *next;
846 int err = 0, enq;
848 IGB_TX_LOCK_ASSERT(txr);
850 if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) !=
851 IFF_RUNNING || adapter->link_active == 0) {
852 if (m != NULL)
853 err = drbr_enqueue(ifp, txr->br, m);
854 return (err);
857 enq = 0;
858 if (m == NULL) {
859 next = drbr_dequeue(ifp, txr->br);
860 } else if (drbr_needs_enqueue(ifp, txr->br)) {
861 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
862 return (err);
863 next = drbr_dequeue(ifp, txr->br);
864 } else
865 next = m;
866 /* Process the queue */
867 while (next != NULL) {
868 if ((err = igb_xmit(txr, &next)) != 0) {
869 if (next != NULL)
870 err = drbr_enqueue(ifp, txr->br, next);
871 break;
873 enq++;
874 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
875 ETHER_BPF_MTAP(ifp, next);
876 if ((ifp->if_flags & IFF_RUNNING) == 0)
877 break;
878 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
879 ifp->if_flags |= IFF_OACTIVE;
880 break;
882 next = drbr_dequeue(ifp, txr->br);
884 if (enq > 0) {
885 /* Set the watchdog */
886 txr->watchdog_check = TRUE;
888 return (err);
892 ** Flush all ring buffers
894 static void
895 igb_qflush(struct ifnet *ifp)
897 struct adapter *adapter = ifp->if_softc;
898 struct tx_ring *txr = adapter->tx_rings;
899 struct mbuf *m;
901 for (int i = 0; i < adapter->num_queues; i++, txr++) {
902 IGB_TX_LOCK(txr);
903 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
904 m_freem(m);
905 IGB_TX_UNLOCK(txr);
907 if_qflush(ifp);
909 #endif /* __FreeBSD_version >= 800000 */
911 /*********************************************************************
912 * Ioctl entry point
914 * igb_ioctl is called when the user wants to configure the
915 * interface.
917 * return 0 on success, positive on failure
918 **********************************************************************/
920 static int
921 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cred)
923 struct adapter *adapter = ifp->if_softc;
924 struct ifreq *ifr = (struct ifreq *)data;
925 #ifdef INET
926 struct ifaddr *ifa = (struct ifaddr *)data;
927 #endif
928 int error = 0;
930 if (adapter->in_detach)
931 return (error);
933 switch (command) {
934 case SIOCSIFADDR:
935 #ifdef INET
936 if (ifa->ifa_addr->sa_family == AF_INET) {
938 * XXX
939 * Since resetting hardware takes a very long time
940 * and results in link renegotiation we only
941 * initialize the hardware only when it is absolutely
942 * required.
944 ifp->if_flags |= IFF_UP;
945 if (!(ifp->if_flags & IFF_RUNNING)) {
946 IGB_CORE_LOCK(adapter);
947 igb_init_locked(adapter);
948 IGB_CORE_UNLOCK(adapter);
950 if (!(ifp->if_flags & IFF_NOARP))
951 arp_ifinit(ifp, ifa);
952 } else
953 #endif
954 error = ether_ioctl(ifp, command, data);
955 break;
956 case SIOCSIFMTU:
958 int max_frame_size;
960 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
962 IGB_CORE_LOCK(adapter);
963 max_frame_size = 9234;
964 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
965 ETHER_CRC_LEN) {
966 IGB_CORE_UNLOCK(adapter);
967 error = EINVAL;
968 break;
971 ifp->if_mtu = ifr->ifr_mtu;
972 adapter->max_frame_size =
973 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
974 igb_init_locked(adapter);
975 IGB_CORE_UNLOCK(adapter);
976 break;
978 case SIOCSIFFLAGS:
979 IOCTL_DEBUGOUT("ioctl rcv'd:\
980 SIOCSIFFLAGS (Set Interface Flags)");
981 IGB_CORE_LOCK(adapter);
982 if (ifp->if_flags & IFF_UP) {
983 if ((ifp->if_flags & IFF_RUNNING)) {
984 if ((ifp->if_flags ^ adapter->if_flags) &
985 (IFF_PROMISC | IFF_ALLMULTI)) {
986 igb_disable_promisc(adapter);
987 igb_set_promisc(adapter);
989 } else
990 igb_init_locked(adapter);
991 } else
992 if (ifp->if_flags & IFF_RUNNING)
993 igb_stop(adapter);
994 adapter->if_flags = ifp->if_flags;
995 IGB_CORE_UNLOCK(adapter);
996 break;
997 case SIOCADDMULTI:
998 case SIOCDELMULTI:
999 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1000 if (ifp->if_flags & IFF_RUNNING) {
1001 IGB_CORE_LOCK(adapter);
1002 igb_disable_intr(adapter);
1003 igb_set_multi(adapter);
1004 #ifdef DEVICE_POLLING
1005 if (!(ifp->if_capenable & IFCAP_POLLING))
1006 #endif
1007 igb_enable_intr(adapter);
1008 IGB_CORE_UNLOCK(adapter);
1010 break;
1011 case SIOCSIFMEDIA:
1012 /* Check SOL/IDER usage */
1013 IGB_CORE_LOCK(adapter);
1014 if (e1000_check_reset_block(&adapter->hw)) {
1015 IGB_CORE_UNLOCK(adapter);
1016 device_printf(adapter->dev, "Media change is"
1017 " blocked due to SOL/IDER session.\n");
1018 break;
1020 IGB_CORE_UNLOCK(adapter);
1021 case SIOCGIFMEDIA:
1022 IOCTL_DEBUGOUT("ioctl rcv'd: \
1023 SIOCxIFMEDIA (Get/Set Interface Media)");
1024 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1025 break;
1026 case SIOCSIFCAP:
1028 int mask, reinit;
1030 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1031 reinit = 0;
1032 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1033 #ifdef DEVICE_POLLING
1034 if (mask & IFCAP_POLLING) {
1035 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1036 error = ether_poll_register(igb_poll, ifp);
1037 if (error)
1038 return (error);
1039 IGB_CORE_LOCK(adapter);
1040 igb_disable_intr(adapter);
1041 ifp->if_capenable |= IFCAP_POLLING;
1042 IGB_CORE_UNLOCK(adapter);
1043 } else {
1044 error = ether_poll_deregister(ifp);
1045 /* Enable interrupt even in error case */
1046 IGB_CORE_LOCK(adapter);
1047 igb_enable_intr(adapter);
1048 ifp->if_capenable &= ~IFCAP_POLLING;
1049 IGB_CORE_UNLOCK(adapter);
1052 #endif
1053 if (mask & IFCAP_HWCSUM) {
1054 ifp->if_capenable ^= IFCAP_HWCSUM;
1055 reinit = 1;
1057 #ifdef NET_TSO
1058 if (mask & IFCAP_TSO4) {
1059 ifp->if_capenable ^= IFCAP_TSO4;
1060 reinit = 1;
1062 #endif
1063 if (mask & IFCAP_VLAN_HWTAGGING) {
1064 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1065 reinit = 1;
1067 #ifdef NET_LRO
1068 if (mask & IFCAP_LRO) {
1069 ifp->if_capenable ^= IFCAP_LRO;
1070 reinit = 1;
1072 #endif
1073 if (reinit && (ifp->if_flags & IFF_RUNNING))
1074 igb_init(adapter);
1075 #if 0
1076 VLAN_CAPABILITIES(ifp);
1077 #endif
1078 break;
1081 default:
1082 error = ether_ioctl(ifp, command, data);
1083 break;
1086 return (error);
1090 /*********************************************************************
1091 * Init entry point
1093 * This routine is used in two ways. It is used by the stack as
1094 * init entry point in network interface structure. It is also used
1095 * by the driver as a hw/sw initialization routine to get to a
1096 * consistent state.
1098 * return 0 on success, positive on failure
1099 **********************************************************************/
1101 static void
1102 igb_init_locked(struct adapter *adapter)
1104 struct ifnet *ifp = adapter->ifp;
1105 device_t dev = adapter->dev;
1107 INIT_DEBUGOUT("igb_init: begin");
1109 IGB_CORE_LOCK_ASSERT(adapter);
1111 igb_disable_intr(adapter);
1112 callout_stop(&adapter->timer);
1114 /* Get the latest mac address, User can use a LAA */
1115 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1116 ETHER_ADDR_LEN);
1118 /* Put the address into the Receive Address Array */
1119 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1121 igb_reset(adapter);
1122 igb_update_link_status(adapter);
1124 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1126 /* Set hardware offload abilities */
1127 ifp->if_hwassist = 0;
1128 if (ifp->if_capenable & IFCAP_TXCSUM) {
1129 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1130 #if __FreeBSD_version >= 800000
1131 if (adapter->hw.mac.type == e1000_82576)
1132 ifp->if_hwassist |= CSUM_SCTP;
1133 #endif
1136 #ifdef NET_TSO
1137 if (ifp->if_capenable & IFCAP_TSO4)
1138 ifp->if_hwassist |= CSUM_TSO;
1139 #endif
1141 /* Configure for OS presence */
1142 igb_init_manageability(adapter);
1144 /* Prepare transmit descriptors and buffers */
1145 igb_setup_transmit_structures(adapter);
1146 igb_initialize_transmit_units(adapter);
1148 /* Setup Multicast table */
1149 igb_set_multi(adapter);
1152 ** Figure out the desired mbuf pool
1153 ** for doing jumbo/packetsplit
1155 if (ifp->if_mtu > ETHERMTU)
1156 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1157 else
1158 adapter->rx_mbuf_sz = MCLBYTES;
1160 /* Prepare receive descriptors and buffers */
1161 if (igb_setup_receive_structures(adapter)) {
1162 device_printf(dev, "Could not setup receive structures\n");
1163 return;
1165 igb_initialize_receive_units(adapter);
1167 /* Don't lose promiscuous settings */
1168 igb_set_promisc(adapter);
1170 ifp->if_flags |= IFF_RUNNING;
1171 ifp->if_flags &= ~IFF_OACTIVE;
1173 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1174 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1176 if (adapter->msix > 1) /* Set up queue routing */
1177 igb_configure_queues(adapter);
1179 /* Set up VLAN tag offload and filter */
1180 igb_setup_vlan_hw_support(adapter);
1182 /* this clears any pending interrupts */
1183 E1000_READ_REG(&adapter->hw, E1000_ICR);
1184 #ifdef DEVICE_POLLING
1186 * Only enable interrupts if we are not polling, make sure
1187 * they are off otherwise.
1189 if (ifp->if_capenable & IFCAP_POLLING)
1190 igb_disable_intr(adapter);
1191 else
1192 #endif /* DEVICE_POLLING */
1194 igb_enable_intr(adapter);
1195 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1198 /* Don't reset the phy next time init gets called */
1199 adapter->hw.phy.reset_disable = TRUE;
1202 static void
1203 igb_init(void *arg)
1205 struct adapter *adapter = arg;
1207 IGB_CORE_LOCK(adapter);
1208 igb_init_locked(adapter);
1209 IGB_CORE_UNLOCK(adapter);
1213 static void
1214 igb_handle_rxtx(void *context, int pending)
1216 struct adapter *adapter = context;
1217 struct tx_ring *txr = adapter->tx_rings;
1218 struct rx_ring *rxr = adapter->rx_rings;
1219 struct ifnet *ifp;
1221 ifp = adapter->ifp;
1223 if (ifp->if_flags & IFF_RUNNING) {
1224 if (igb_rxeof(rxr, adapter->rx_process_limit))
1225 taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1226 IGB_TX_LOCK(txr);
1227 igb_txeof(txr);
1229 #if __FreeBSD_version >= 800000
1230 if (!drbr_empty(ifp, txr->br))
1231 igb_mq_start_locked(ifp, txr, NULL);
1232 #else
1233 if (!ifq_is_empty(&ifp->if_snd))
1234 igb_start_locked(txr, ifp);
1235 #endif
1236 IGB_TX_UNLOCK(txr);
1239 igb_enable_intr(adapter);
1242 static void
1243 igb_handle_que(void *context, int pending)
1245 struct igb_queue *que = context;
1246 struct adapter *adapter = que->adapter;
1247 struct tx_ring *txr = que->txr;
1248 struct rx_ring *rxr = que->rxr;
1249 struct ifnet *ifp = adapter->ifp;
1250 u32 loop = IGB_MAX_LOOP;
1251 bool more;
1253 /* RX first */
1254 do {
1255 more = igb_rxeof(rxr, -1);
1256 } while (loop-- && more);
1258 if (IGB_TX_TRYLOCK(txr)) {
1259 loop = IGB_MAX_LOOP;
1260 do {
1261 more = igb_txeof(txr);
1262 } while (loop-- && more);
1263 #if __FreeBSD_version >= 800000
1264 igb_mq_start_locked(ifp, txr, NULL);
1265 #else
1266 if (!ifq_is_empty(&ifp->if_snd))
1267 igb_start_locked(txr, ifp);
1268 #endif
1269 IGB_TX_UNLOCK(txr);
1272 /* Reenable this interrupt */
1273 #ifdef DEVICE_POLLING
1274 if (!(ifp->if_capenable & IFCAP_POLLING))
1275 #endif
1276 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1279 /* Deal with link in a sleepable context */
1280 static void
1281 igb_handle_link(void *context, int pending)
1283 struct adapter *adapter = context;
1285 adapter->hw.mac.get_link_status = 1;
1286 igb_update_link_status(adapter);
1289 /*********************************************************************
1291 * MSI/Legacy Deferred
1292 * Interrupt Service routine
1294 *********************************************************************/
1295 #define FILTER_STRAY
1296 #define FILTER_HANDLED
1297 static void
1298 igb_irq_fast(void *arg)
1300 struct adapter *adapter = arg;
1301 uint32_t reg_icr;
1304 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1306 /* Hot eject? */
1307 if (reg_icr == 0xffffffff)
1308 return FILTER_STRAY;
1310 /* Definitely not our interrupt. */
1311 if (reg_icr == 0x0)
1312 return FILTER_STRAY;
1314 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1315 return FILTER_STRAY;
1318 * Mask interrupts until the taskqueue is finished running. This is
1319 * cheap, just assume that it is needed. This also works around the
1320 * MSI message reordering errata on certain systems.
1322 igb_disable_intr(adapter);
1323 taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1325 /* Link status change */
1326 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1327 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1329 if (reg_icr & E1000_ICR_RXO)
1330 adapter->rx_overruns++;
1331 return FILTER_HANDLED;
1334 #ifdef DEVICE_POLLING
1335 /*********************************************************************
1337 * Legacy polling routine
1339 *********************************************************************/
1340 #if __FreeBSD_version >= 800000
1341 #define POLL_RETURN_COUNT(a) (a)
1342 static int
1343 #else
1344 #define POLL_RETURN_COUNT(a)
1345 static void
1346 #endif
1347 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1349 struct adapter *adapter = ifp->if_softc;
1350 struct rx_ring *rxr = adapter->rx_rings;
1351 struct tx_ring *txr = adapter->tx_rings;
1352 u32 reg_icr, rx_done = 0;
1353 u32 loop = IGB_MAX_LOOP;
1354 bool more;
1356 IGB_CORE_LOCK(adapter);
1357 if ((ifp->if_flags & IFF_RUNNING) == 0) {
1358 IGB_CORE_UNLOCK(adapter);
1359 return POLL_RETURN_COUNT(rx_done);
1362 if (cmd == POLL_AND_CHECK_STATUS) {
1363 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1364 /* Link status change */
1365 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1366 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1368 if (reg_icr & E1000_ICR_RXO)
1369 adapter->rx_overruns++;
1371 IGB_CORE_UNLOCK(adapter);
1373 /* TODO: rx_count */
1374 rx_done = igb_rxeof(rxr, count) ? 1 : 0;
1376 IGB_TX_LOCK(txr);
1377 do {
1378 more = igb_txeof(txr);
1379 } while (loop-- && more);
1380 #if __FreeBSD_version >= 800000
1381 if (!drbr_empty(ifp, txr->br))
1382 igb_mq_start_locked(ifp, txr, NULL);
1383 #else
1384 if (!ifq_is_empty(&ifp->if_snd))
1385 igb_start_locked(txr, ifp);
1386 #endif
1387 IGB_TX_UNLOCK(txr);
1388 return POLL_RETURN_COUNT(rx_done);
1390 #endif /* DEVICE_POLLING */
1392 /*********************************************************************
1394 * MSIX TX Interrupt Service routine
1396 **********************************************************************/
1397 static void
1398 igb_msix_que(void *arg)
1400 struct igb_queue *que = arg;
1401 struct adapter *adapter = que->adapter;
1402 struct tx_ring *txr = que->txr;
1403 struct rx_ring *rxr = que->rxr;
1404 u32 newitr = 0;
1405 bool more_tx, more_rx;
1407 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1408 ++que->irqs;
1410 IGB_TX_LOCK(txr);
1411 more_tx = igb_txeof(txr);
1412 IGB_TX_UNLOCK(txr);
1414 more_rx = igb_rxeof(rxr, adapter->rx_process_limit);
1416 if (igb_enable_aim == FALSE)
1417 goto no_calc;
1419 ** Do Adaptive Interrupt Moderation:
1420 ** - Write out last calculated setting
1421 ** - Calculate based on average size over
1422 ** the last interval.
1424 if (que->eitr_setting)
1425 E1000_WRITE_REG(&adapter->hw,
1426 E1000_EITR(que->msix), que->eitr_setting);
1428 que->eitr_setting = 0;
1430 /* Idle, do nothing */
1431 if ((txr->bytes == 0) && (rxr->bytes == 0))
1432 goto no_calc;
1434 /* Used half Default if sub-gig */
1435 if (adapter->link_speed != 1000)
1436 newitr = IGB_DEFAULT_ITR / 2;
1437 else {
1438 if ((txr->bytes) && (txr->packets))
1439 newitr = txr->bytes/txr->packets;
1440 if ((rxr->bytes) && (rxr->packets))
1441 newitr = max(newitr,
1442 (rxr->bytes / rxr->packets));
1443 newitr += 24; /* account for hardware frame, crc */
1444 /* set an upper boundary */
1445 newitr = min(newitr, 3000);
1446 /* Be nice to the mid range */
1447 if ((newitr > 300) && (newitr < 1200))
1448 newitr = (newitr / 3);
1449 else
1450 newitr = (newitr / 2);
1452 newitr &= 0x7FFC; /* Mask invalid bits */
1453 if (adapter->hw.mac.type == e1000_82575)
1454 newitr |= newitr << 16;
1455 else
1456 newitr |= 0x8000000;
1458 /* save for next interrupt */
1459 que->eitr_setting = newitr;
1461 /* Reset state */
1462 txr->bytes = 0;
1463 txr->packets = 0;
1464 rxr->bytes = 0;
1465 rxr->packets = 0;
1467 no_calc:
1468 /* Schedule a clean task if needed*/
1469 if (more_tx || more_rx)
1470 taskqueue_enqueue(que->tq, &que->que_task);
1471 else
1472 /* Reenable this interrupt */
1473 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1474 return;
1478 /*********************************************************************
1480 * MSIX Link Interrupt Service routine
1482 **********************************************************************/
1484 static void
1485 igb_msix_link(void *arg)
1487 struct adapter *adapter = arg;
1488 u32 icr;
1490 ++adapter->link_irq;
1491 icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1492 if (!(icr & E1000_ICR_LSC))
1493 goto spurious;
1494 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1496 spurious:
1497 /* Rearm */
1498 E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1499 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1500 return;
1504 /*********************************************************************
1506 * Media Ioctl callback
1508 * This routine is called whenever the user queries the status of
1509 * the interface using ifconfig.
1511 **********************************************************************/
1512 static void
1513 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1515 struct adapter *adapter = ifp->if_softc;
1516 u_char fiber_type = IFM_1000_SX;
1518 INIT_DEBUGOUT("igb_media_status: begin");
1520 IGB_CORE_LOCK(adapter);
1521 igb_update_link_status(adapter);
1523 ifmr->ifm_status = IFM_AVALID;
1524 ifmr->ifm_active = IFM_ETHER;
1526 if (!adapter->link_active) {
1527 IGB_CORE_UNLOCK(adapter);
1528 return;
1531 ifmr->ifm_status |= IFM_ACTIVE;
1533 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1534 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1535 ifmr->ifm_active |= fiber_type | IFM_FDX;
1536 else {
1537 switch (adapter->link_speed) {
1538 case 10:
1539 ifmr->ifm_active |= IFM_10_T;
1540 break;
1541 case 100:
1542 ifmr->ifm_active |= IFM_100_TX;
1543 break;
1544 case 1000:
1545 ifmr->ifm_active |= IFM_1000_T;
1546 break;
1548 if (adapter->link_duplex == FULL_DUPLEX)
1549 ifmr->ifm_active |= IFM_FDX;
1550 else
1551 ifmr->ifm_active |= IFM_HDX;
1553 IGB_CORE_UNLOCK(adapter);
1556 /*********************************************************************
1558 * Media Ioctl callback
1560 * This routine is called when the user changes speed/duplex using
1561 * media/mediopt option with ifconfig.
1563 **********************************************************************/
1564 static int
1565 igb_media_change(struct ifnet *ifp)
1567 struct adapter *adapter = ifp->if_softc;
1568 struct ifmedia *ifm = &adapter->media;
1570 INIT_DEBUGOUT("igb_media_change: begin");
1572 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1573 return (EINVAL);
1575 IGB_CORE_LOCK(adapter);
1576 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1577 case IFM_AUTO:
1578 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1579 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1580 break;
1581 case IFM_1000_LX:
1582 case IFM_1000_SX:
1583 case IFM_1000_T:
1584 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1585 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1586 break;
1587 case IFM_100_TX:
1588 adapter->hw.mac.autoneg = FALSE;
1589 adapter->hw.phy.autoneg_advertised = 0;
1590 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1591 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1592 else
1593 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1594 break;
1595 case IFM_10_T:
1596 adapter->hw.mac.autoneg = FALSE;
1597 adapter->hw.phy.autoneg_advertised = 0;
1598 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1599 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1600 else
1601 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1602 break;
1603 default:
1604 device_printf(adapter->dev, "Unsupported media type\n");
1607 /* As the speed/duplex settings my have changed we need to
1608 * reset the PHY.
1610 adapter->hw.phy.reset_disable = FALSE;
1612 igb_init_locked(adapter);
1613 IGB_CORE_UNLOCK(adapter);
1615 return (0);
1619 /*********************************************************************
1621 * This routine maps the mbufs to Advanced TX descriptors.
1622 * used by the 82575 adapter.
1624 **********************************************************************/
1626 static int
1627 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1629 struct adapter *adapter = txr->adapter;
1630 bus_dma_segment_t segs[IGB_MAX_SCATTER];
1631 bus_dmamap_t map;
1632 struct igb_tx_buffer *tx_buffer, *tx_buffer_mapped;
1633 union e1000_adv_tx_desc *txd = NULL;
1634 struct mbuf *m_head;
1635 u32 olinfo_status = 0, cmd_type_len = 0;
1636 int nsegs, i, j, error, first, last = 0;
1637 u32 hdrlen = 0;
1639 m_head = *m_headp;
1642 /* Set basic descriptor constants */
1643 cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1644 cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1645 if (m_head->m_flags & M_VLANTAG)
1646 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1649 * Force a cleanup if number of TX descriptors
1650 * available hits the threshold
1652 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1653 igb_txeof(txr);
1654 /* Now do we at least have a minimal? */
1655 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1656 txr->no_desc_avail++;
1657 return (ENOBUFS);
1662 * Map the packet for DMA.
1664 * Capture the first descriptor index,
1665 * this descriptor will have the index
1666 * of the EOP which is the only one that
1667 * now gets a DONE bit writeback.
1669 first = txr->next_avail_desc;
1670 tx_buffer = &txr->tx_buffers[first];
1671 tx_buffer_mapped = tx_buffer;
1672 map = tx_buffer->map;
1674 error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1675 *m_headp, segs, IGB_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
1677 if (error == EFBIG) {
1678 struct mbuf *m;
1680 m = m_defrag(*m_headp, MB_DONTWAIT);
1681 if (m == NULL) {
1682 adapter->mbuf_defrag_failed++;
1683 m_freem(*m_headp);
1684 *m_headp = NULL;
1685 return (ENOBUFS);
1687 *m_headp = m;
1689 /* Try it again */
1690 error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1691 *m_headp, segs, IGB_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
1693 if (error == ENOMEM) {
1694 adapter->no_tx_dma_setup++;
1695 return (error);
1696 } else if (error != 0) {
1697 adapter->no_tx_dma_setup++;
1698 m_freem(*m_headp);
1699 *m_headp = NULL;
1700 return (error);
1702 } else if (error == ENOMEM) {
1703 adapter->no_tx_dma_setup++;
1704 return (error);
1705 } else if (error != 0) {
1706 adapter->no_tx_dma_setup++;
1707 m_freem(*m_headp);
1708 *m_headp = NULL;
1709 return (error);
1712 /* Check again to be sure we have enough descriptors */
1713 if (nsegs > (txr->tx_avail - 2)) {
1714 txr->no_desc_avail++;
1715 bus_dmamap_unload(txr->txtag, map);
1716 return (ENOBUFS);
1718 m_head = *m_headp;
1721 * Set up the context descriptor:
1722 * used when any hardware offload is done.
1723 * This includes CSUM, VLAN, and TSO. It
1724 * will use the first descriptor.
1726 #ifdef NET_TSO
1727 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1728 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1729 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1730 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1731 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1732 } else
1733 return (ENXIO);
1734 } else
1735 #endif
1736 if (igb_tx_ctx_setup(txr, m_head))
1737 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1739 /* Calculate payload length */
1740 olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1741 << E1000_ADVTXD_PAYLEN_SHIFT);
1743 /* 82575 needs the queue index added */
1744 if (adapter->hw.mac.type == e1000_82575)
1745 olinfo_status |= txr->me << 4;
1747 /* Set up our transmit descriptors */
1748 i = txr->next_avail_desc;
1749 for (j = 0; j < nsegs; j++) {
1750 bus_size_t seg_len;
1751 bus_addr_t seg_addr;
1753 tx_buffer = &txr->tx_buffers[i];
1754 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1755 seg_addr = segs[j].ds_addr;
1756 seg_len = segs[j].ds_len;
1758 txd->read.buffer_addr = htole64(seg_addr);
1759 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1760 txd->read.olinfo_status = htole32(olinfo_status);
1761 last = i;
1762 if (++i == adapter->num_tx_desc)
1763 i = 0;
1764 tx_buffer->m_head = NULL;
1765 tx_buffer->next_eop = -1;
1768 txr->next_avail_desc = i;
1769 txr->tx_avail -= nsegs;
1771 tx_buffer->m_head = m_head;
1772 tx_buffer_mapped->map = tx_buffer->map;
1773 tx_buffer->map = map;
1774 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1777 * Last Descriptor of Packet
1778 * needs End Of Packet (EOP)
1779 * and Report Status (RS)
1781 txd->read.cmd_type_len |=
1782 htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1784 * Keep track in the first buffer which
1785 * descriptor will be written back
1787 tx_buffer = &txr->tx_buffers[first];
1788 tx_buffer->next_eop = last;
1789 txr->watchdog_time = ticks;
1792 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1793 * that this frame is available to transmit.
1795 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1796 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1797 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1798 ++txr->tx_packets;
1800 return (0);
1804 static void
1805 igb_set_promisc(struct adapter *adapter)
1807 struct ifnet *ifp = adapter->ifp;
1808 uint32_t reg_rctl;
1810 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1812 if (ifp->if_flags & IFF_PROMISC) {
1813 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1814 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1815 } else if (ifp->if_flags & IFF_ALLMULTI) {
1816 reg_rctl |= E1000_RCTL_MPE;
1817 reg_rctl &= ~E1000_RCTL_UPE;
1818 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1822 static void
1823 igb_disable_promisc(struct adapter *adapter)
1825 uint32_t reg_rctl;
1827 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1829 reg_rctl &= (~E1000_RCTL_UPE);
1830 reg_rctl &= (~E1000_RCTL_MPE);
1831 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1835 /*********************************************************************
1836 * Multicast Update
1838 * This routine is called whenever multicast address list is updated.
1840 **********************************************************************/
1842 static void
1843 igb_set_multi(struct adapter *adapter)
1845 struct ifnet *ifp = adapter->ifp;
1846 struct ifmultiaddr *ifma;
1847 u32 reg_rctl = 0;
1848 u8 mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1850 int mcnt = 0;
1852 IOCTL_DEBUGOUT("igb_set_multi: begin");
1854 #if 0
1855 #if __FreeBSD_version < 800000
1856 IF_ADDR_LOCK(ifp);
1857 #else
1858 if_maddr_rlock(ifp);
1859 #endif
1860 #endif
1862 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1863 if (ifma->ifma_addr->sa_family != AF_LINK)
1864 continue;
1866 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1867 break;
1869 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1870 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1871 mcnt++;
1873 #if 0
1874 #if __FreeBSD_version < 800000
1875 IF_ADDR_UNLOCK(ifp);
1876 #else
1877 if_maddr_runlock(ifp);
1878 #endif
1879 #endif
1881 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1882 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1883 reg_rctl |= E1000_RCTL_MPE;
1884 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1885 } else
1886 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1890 /*********************************************************************
1891 * Timer routine:
1892 * This routine checks for link status,
1893 * updates statistics, and does the watchdog.
1895 **********************************************************************/
1897 static void
1898 igb_local_timer(void *arg)
1900 struct adapter *adapter = arg;
1902 IGB_CORE_LOCK(adapter);
1904 struct ifnet *ifp = adapter->ifp;
1905 device_t dev = adapter->dev;
1906 struct tx_ring *txr = adapter->tx_rings;
1909 IGB_CORE_LOCK_ASSERT(adapter);
1911 igb_update_link_status(adapter);
1912 igb_update_stats_counters(adapter);
1914 if (igb_display_debug_stats && ifp->if_flags & IFF_RUNNING)
1915 igb_print_hw_stats(adapter);
1918 ** Watchdog: check for time since any descriptor was cleaned
1920 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1921 if (txr->watchdog_check == FALSE)
1922 continue;
1923 if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1924 goto timeout;
1927 /* Trigger an RX interrupt on all queues */
1928 #ifdef DEVICE_POLLING
1929 if (!(ifp->if_capenable & IFCAP_POLLING))
1930 #endif
1931 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1932 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1933 IGB_CORE_UNLOCK(adapter);
1934 return;
1936 timeout:
1937 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1938 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1939 E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1940 E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1941 device_printf(dev,"TX(%d) desc avail = %d,"
1942 "Next TX to Clean = %d\n",
1943 txr->me, txr->tx_avail, txr->next_to_clean);
1944 adapter->ifp->if_flags &= ~IFF_RUNNING;
1945 adapter->watchdog_events++;
1946 igb_init_locked(adapter);
1947 IGB_CORE_UNLOCK(adapter);
1950 static void
1951 igb_update_link_status(struct adapter *adapter)
1953 struct e1000_hw *hw = &adapter->hw;
1954 struct ifnet *ifp = adapter->ifp;
1955 device_t dev = adapter->dev;
1956 struct tx_ring *txr = adapter->tx_rings;
1957 u32 link_check = 0;
1959 /* Get the cached link value or read for real */
1960 switch (hw->phy.media_type) {
1961 case e1000_media_type_copper:
1962 if (hw->mac.get_link_status) {
1963 /* Do the work to read phy */
1964 e1000_check_for_link(hw);
1965 link_check = !hw->mac.get_link_status;
1966 } else
1967 link_check = TRUE;
1968 break;
1969 case e1000_media_type_fiber:
1970 e1000_check_for_link(hw);
1971 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1972 E1000_STATUS_LU);
1973 break;
1974 case e1000_media_type_internal_serdes:
1975 e1000_check_for_link(hw);
1976 link_check = adapter->hw.mac.serdes_has_link;
1977 break;
1978 default:
1979 case e1000_media_type_unknown:
1980 break;
1983 /* Now we check if a transition has happened */
1984 if (link_check && (adapter->link_active == 0)) {
1985 e1000_get_speed_and_duplex(&adapter->hw,
1986 &adapter->link_speed, &adapter->link_duplex);
1987 if (bootverbose)
1988 device_printf(dev, "Link is up %d Mbps %s\n",
1989 adapter->link_speed,
1990 ((adapter->link_duplex == FULL_DUPLEX) ?
1991 "Full Duplex" : "Half Duplex"));
1992 adapter->link_active = 1;
1993 ifp->if_baudrate = adapter->link_speed * 1000000;
1994 ifp->if_link_state = LINK_STATE_UP;
1995 if_link_state_change(ifp);
1996 } else if (!link_check && (adapter->link_active == 1)) {
1997 ifp->if_baudrate = adapter->link_speed = 0;
1998 adapter->link_duplex = 0;
1999 if (bootverbose)
2000 device_printf(dev, "Link is Down\n");
2001 adapter->link_active = 0;
2002 ifp->if_link_state = LINK_STATE_DOWN;
2003 if_link_state_change(ifp);
2004 /* Turn off watchdogs */
2005 for (int i = 0; i < adapter->num_queues; i++, txr++)
2006 txr->watchdog_check = FALSE;
2010 /*********************************************************************
2012 * This routine disables all traffic on the adapter by issuing a
2013 * global reset on the MAC and deallocates TX/RX buffers.
2015 **********************************************************************/
2017 static void
2018 igb_stop(void *arg)
2020 struct adapter *adapter = arg;
2021 struct ifnet *ifp = adapter->ifp;
2022 struct tx_ring *txr = adapter->tx_rings;
2024 IGB_CORE_LOCK_ASSERT(adapter);
2026 INIT_DEBUGOUT("igb_stop: begin");
2028 igb_disable_intr(adapter);
2030 callout_stop(&adapter->timer);
2032 /* Tell the stack that the interface is no longer active */
2033 ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
2035 /* Unarm watchdog timer. */
2036 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2037 IGB_TX_LOCK(txr);
2038 txr->watchdog_check = FALSE;
2039 IGB_TX_UNLOCK(txr);
2042 e1000_reset_hw(&adapter->hw);
2043 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2047 /*********************************************************************
2049 * Determine hardware revision.
2051 **********************************************************************/
2052 static void
2053 igb_identify_hardware(struct adapter *adapter)
2055 device_t dev = adapter->dev;
2057 /* Make sure our PCI config space has the necessary stuff set */
2058 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2059 if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2060 (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2061 device_printf(dev, "Memory Access and/or Bus Master bits "
2062 "were not set!\n");
2063 adapter->hw.bus.pci_cmd_word |=
2064 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2065 pci_write_config(dev, PCIR_COMMAND,
2066 adapter->hw.bus.pci_cmd_word, 2);
2069 /* Save off the information about this board */
2070 adapter->hw.vendor_id = pci_get_vendor(dev);
2071 adapter->hw.device_id = pci_get_device(dev);
2072 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2073 adapter->hw.subsystem_vendor_id =
2074 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2075 adapter->hw.subsystem_device_id =
2076 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2078 /* Do Shared Code Init and Setup */
2079 if (e1000_set_mac_type(&adapter->hw)) {
2080 device_printf(dev, "Setup init failure\n");
2081 return;
2085 static int
2086 igb_allocate_pci_resources(struct adapter *adapter)
2088 device_t dev = adapter->dev;
2089 int rid;
2091 rid = PCIR_BAR(0);
2092 adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2093 &rid, RF_ACTIVE);
2094 if (adapter->pci_mem == NULL) {
2095 device_printf(dev, "Unable to allocate bus resource: memory\n");
2096 return (ENXIO);
2098 adapter->osdep.mem_bus_space_tag =
2099 rman_get_bustag(adapter->pci_mem);
2100 adapter->osdep.mem_bus_space_handle =
2101 rman_get_bushandle(adapter->pci_mem);
2102 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2104 adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2106 /* This will setup either MSI/X or MSI */
2107 adapter->msix = igb_setup_msix(adapter);
2108 adapter->hw.back = &adapter->osdep;
2110 return (0);
2113 /*********************************************************************
2115 * Setup the Legacy or MSI Interrupt handler
2117 **********************************************************************/
2118 static int
2119 igb_allocate_legacy(struct adapter *adapter)
2121 device_t dev = adapter->dev;
2122 int error, rid = 0;
2124 /* Turn off all interrupts */
2125 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2127 /* MSI RID is 1 */
2128 if (adapter->msix == 1)
2129 rid = 1;
2131 /* We allocate a single interrupt resource */
2132 adapter->res = bus_alloc_resource_any(dev,
2133 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2134 if (adapter->res == NULL) {
2135 device_printf(dev, "Unable to allocate bus resource: "
2136 "interrupt\n");
2137 return (ENXIO);
2141 * Try allocating a fast interrupt and the associated deferred
2142 * processing contexts.
2144 TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2145 /* Make tasklet for deferred link handling */
2146 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2147 adapter->tq = taskqueue_create("igb_taskq", M_NOWAIT,
2148 taskqueue_thread_enqueue, &adapter->tq);
2149 taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s taskq",
2150 device_get_nameunit(adapter->dev));
2151 if ((error = bus_setup_intr(dev, adapter->res,
2152 /*INTR_TYPE_NET |*/ INTR_MPSAFE, igb_irq_fast,
2153 adapter, &adapter->tag, NULL)) != 0) {
2154 device_printf(dev, "Failed to register fast interrupt "
2155 "handler: %d\n", error);
2156 taskqueue_free(adapter->tq);
2157 adapter->tq = NULL;
2158 return (error);
2161 return (0);
2165 /*********************************************************************
2167 * Setup the MSIX Queue Interrupt handlers:
2169 **********************************************************************/
2170 static int
2171 igb_allocate_msix(struct adapter *adapter)
2173 device_t dev = adapter->dev;
2174 struct igb_queue *que = adapter->queues;
2175 int error, rid, vector = 0;
2178 for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2179 rid = vector + 1;
2180 que->res = bus_alloc_resource_any(dev,
2181 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2182 if (que->res == NULL) {
2183 device_printf(dev,
2184 "Unable to allocate bus resource: "
2185 "MSIX Queue Interrupt\n");
2186 return (ENXIO);
2188 error = bus_setup_intr(dev, que->res,
2189 /*INTR_TYPE_NET |*/ INTR_MPSAFE,
2190 igb_msix_que, que, &que->tag, NULL);
2191 if (error) {
2192 que->res = NULL;
2193 device_printf(dev, "Failed to register Queue handler");
2194 return (error);
2196 que->msix = vector;
2197 if (adapter->hw.mac.type == e1000_82575)
2198 que->eims = E1000_EICR_TX_QUEUE0 << i;
2199 else
2200 que->eims = 1 << vector;
2202 ** Bind the msix vector, and thus the
2203 ** rings to the corresponding cpu.
2205 #if 0
2206 if (adapter->num_queues > 1)
2207 bus_bind_intr(dev, que->res, i);
2208 #endif
2209 /* Make tasklet for deferred handling */
2210 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2211 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2212 taskqueue_thread_enqueue, &que->tq);
2213 taskqueue_start_threads(&que->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s que",
2214 device_get_nameunit(adapter->dev));
2217 /* And Link */
2218 rid = vector + 1;
2219 adapter->res = bus_alloc_resource_any(dev,
2220 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2221 if (adapter->res == NULL) {
2222 device_printf(dev,
2223 "Unable to allocate bus resource: "
2224 "MSIX Link Interrupt\n");
2225 return (ENXIO);
2227 if ((error = bus_setup_intr(dev, adapter->res,
2228 /*INTR_TYPE_NET |*/ INTR_MPSAFE,
2229 igb_msix_link, adapter, &adapter->tag, NULL)) != 0) {
2230 device_printf(dev, "Failed to register Link handler");
2231 return (error);
2233 adapter->linkvec = vector;
2235 /* Make tasklet for deferred handling */
2236 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2237 adapter->tq = taskqueue_create("igb_link", M_NOWAIT,
2238 taskqueue_thread_enqueue, &adapter->tq);
2239 taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s link",
2240 device_get_nameunit(adapter->dev));
2242 return (0);
2246 static void
2247 igb_configure_queues(struct adapter *adapter)
2249 struct e1000_hw *hw = &adapter->hw;
2250 struct igb_queue *que;
2251 u32 tmp, ivar = 0;
2252 u32 newitr = IGB_DEFAULT_ITR;
2254 /* First turn on RSS capability */
2255 if (adapter->hw.mac.type > e1000_82575)
2256 E1000_WRITE_REG(hw, E1000_GPIE,
2257 E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2258 E1000_GPIE_PBA | E1000_GPIE_NSICR);
2260 /* Turn on MSIX */
2261 switch (adapter->hw.mac.type) {
2262 case e1000_82580:
2263 /* RX entries */
2264 for (int i = 0; i < adapter->num_queues; i++) {
2265 u32 index = i >> 1;
2266 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2267 que = &adapter->queues[i];
2268 if (i & 1) {
2269 ivar &= 0xFF00FFFF;
2270 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2271 } else {
2272 ivar &= 0xFFFFFF00;
2273 ivar |= que->msix | E1000_IVAR_VALID;
2275 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2277 /* TX entries */
2278 for (int i = 0; i < adapter->num_queues; i++) {
2279 u32 index = i >> 1;
2280 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2281 que = &adapter->queues[i];
2282 if (i & 1) {
2283 ivar &= 0x00FFFFFF;
2284 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2285 } else {
2286 ivar &= 0xFFFF00FF;
2287 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2289 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2290 adapter->eims_mask |= que->eims;
2293 /* And for the link interrupt */
2294 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2295 adapter->link_mask = 1 << adapter->linkvec;
2296 adapter->eims_mask |= adapter->link_mask;
2297 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2298 break;
2299 case e1000_82576:
2300 /* RX entries */
2301 for (int i = 0; i < adapter->num_queues; i++) {
2302 u32 index = i & 0x7; /* Each IVAR has two entries */
2303 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2304 que = &adapter->queues[i];
2305 if (i < 8) {
2306 ivar &= 0xFFFFFF00;
2307 ivar |= que->msix | E1000_IVAR_VALID;
2308 } else {
2309 ivar &= 0xFF00FFFF;
2310 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2312 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2313 adapter->eims_mask |= que->eims;
2315 /* TX entries */
2316 for (int i = 0; i < adapter->num_queues; i++) {
2317 u32 index = i & 0x7; /* Each IVAR has two entries */
2318 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2319 que = &adapter->queues[i];
2320 if (i < 8) {
2321 ivar &= 0xFFFF00FF;
2322 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2323 } else {
2324 ivar &= 0x00FFFFFF;
2325 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2327 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2328 adapter->eims_mask |= que->eims;
2331 /* And for the link interrupt */
2332 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2333 adapter->link_mask = 1 << adapter->linkvec;
2334 adapter->eims_mask |= adapter->link_mask;
2335 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2336 break;
2338 case e1000_82575:
2339 /* enable MSI-X support*/
2340 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2341 tmp |= E1000_CTRL_EXT_PBA_CLR;
2342 /* Auto-Mask interrupts upon ICR read. */
2343 tmp |= E1000_CTRL_EXT_EIAME;
2344 tmp |= E1000_CTRL_EXT_IRCA;
2345 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2347 /* Queues */
2348 for (int i = 0; i < adapter->num_queues; i++) {
2349 que = &adapter->queues[i];
2350 tmp = E1000_EICR_RX_QUEUE0 << i;
2351 tmp |= E1000_EICR_TX_QUEUE0 << i;
2352 que->eims = tmp;
2353 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2354 i, que->eims);
2355 adapter->eims_mask |= que->eims;
2358 /* Link */
2359 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2360 E1000_EIMS_OTHER);
2361 adapter->link_mask |= E1000_EIMS_OTHER;
2362 adapter->eims_mask |= adapter->link_mask;
2363 default:
2364 break;
2367 /* Set the starting interrupt rate */
2368 if (hw->mac.type == e1000_82575)
2369 newitr |= newitr << 16;
2370 else
2371 newitr |= 0x8000000;
2373 for (int i = 0; i < adapter->num_queues; i++) {
2374 que = &adapter->queues[i];
2375 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2378 return;
2382 static void
2383 igb_free_pci_resources(struct adapter *adapter)
2385 struct igb_queue *que = adapter->queues;
2386 device_t dev = adapter->dev;
2387 int rid;
2390 ** There is a slight possibility of a failure mode
2391 ** in attach that will result in entering this function
2392 ** before interrupt resources have been initialized, and
2393 ** in that case we do not want to execute the loops below
2394 ** We can detect this reliably by the state of the adapter
2395 ** res pointer.
2397 if (adapter->res == NULL)
2398 goto mem;
2401 * First release all the interrupt resources:
2403 for (int i = 0; i < adapter->num_queues; i++, que++) {
2404 rid = que->msix + 1;
2405 if (que->tag != NULL) {
2406 bus_teardown_intr(dev, que->res, que->tag);
2407 que->tag = NULL;
2409 if (que->res != NULL)
2410 bus_release_resource(dev,
2411 SYS_RES_IRQ, rid, que->res);
2414 /* Clean the Legacy or Link interrupt last */
2415 if (adapter->linkvec) /* we are doing MSIX */
2416 rid = adapter->linkvec + 1;
2417 else
2418 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2420 if (adapter->tag != NULL) {
2421 bus_teardown_intr(dev, adapter->res, adapter->tag);
2422 adapter->tag = NULL;
2424 if (adapter->res != NULL)
2425 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2427 mem:
2428 if (adapter->msix)
2429 pci_release_msi(dev);
2431 if (adapter->msix_mem != NULL)
2432 bus_release_resource(dev, SYS_RES_MEMORY,
2433 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2435 if (adapter->pci_mem != NULL)
2436 bus_release_resource(dev, SYS_RES_MEMORY,
2437 PCIR_BAR(0), adapter->pci_mem);
2442 * Setup Either MSI/X or MSI
2444 static int
2445 igb_setup_msix(struct adapter *adapter)
2447 device_t dev = adapter->dev;
2448 int rid, want, queues, msgs;
2450 /* tuneable override */
2451 if (igb_enable_msix == 0)
2452 goto msi;
2454 /* First try MSI/X */
2455 rid = PCIR_BAR(IGB_MSIX_BAR);
2456 adapter->msix_mem = bus_alloc_resource_any(dev,
2457 SYS_RES_MEMORY, &rid, RF_ACTIVE);
2458 if (!adapter->msix_mem) {
2459 /* May not be enabled */
2460 device_printf(adapter->dev,
2461 "Unable to map MSIX table \n");
2462 goto msi;
2465 msgs = pci_msix_count(dev);
2466 if (msgs == 0) { /* system has msix disabled */
2467 bus_release_resource(dev, SYS_RES_MEMORY,
2468 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2469 adapter->msix_mem = NULL;
2470 goto msi;
2473 /* Figure out a reasonable auto config value */
2474 queues = (ncpus > (msgs-1)) ? (msgs-1) : ncpus;
2476 /* Can have max of 4 queues on 82575 */
2477 if (adapter->hw.mac.type == e1000_82575) {
2478 if (queues > 4)
2479 queues = 4;
2480 if (igb_num_queues > 4)
2481 igb_num_queues = 4;
2484 if (igb_num_queues == 0)
2485 igb_num_queues = queues;
2488 ** One vector (RX/TX pair) per queue
2489 ** plus an additional for Link interrupt
2491 want = igb_num_queues + 1;
2492 if (msgs >= want)
2493 msgs = want;
2494 else {
2495 device_printf(adapter->dev,
2496 "MSIX Configuration Problem, "
2497 "%d vectors configured, but %d queues wanted!\n",
2498 msgs, want);
2499 return (ENXIO);
2501 if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2502 device_printf(adapter->dev,
2503 "Using MSIX interrupts with %d vectors\n", msgs);
2504 adapter->num_queues = igb_num_queues;
2505 return (msgs);
2507 msi:
2508 msgs = pci_msi_count(dev);
2509 if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2510 device_printf(adapter->dev,"Using MSI interrupt\n");
2511 return (msgs);
2514 /*********************************************************************
2516 * Set up an fresh starting state
2518 **********************************************************************/
2519 static void
2520 igb_reset(struct adapter *adapter)
2522 device_t dev = adapter->dev;
2523 struct e1000_hw *hw = &adapter->hw;
2524 struct e1000_fc_info *fc = &hw->fc;
2525 struct ifnet *ifp = adapter->ifp;
2526 u32 pba = 0;
2527 u16 hwm;
2529 INIT_DEBUGOUT("igb_reset: begin");
2531 /* Let the firmware know the OS is in control */
2532 igb_get_hw_control(adapter);
2535 * Packet Buffer Allocation (PBA)
2536 * Writing PBA sets the receive portion of the buffer
2537 * the remainder is used for the transmit buffer.
2539 switch (hw->mac.type) {
2540 case e1000_82575:
2541 pba = E1000_PBA_32K;
2542 break;
2543 case e1000_82576:
2544 pba = E1000_PBA_64K;
2545 break;
2546 case e1000_82580:
2547 pba = E1000_PBA_35K;
2548 default:
2549 break;
2552 /* Special needs in case of Jumbo frames */
2553 if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2554 u32 tx_space, min_tx, min_rx;
2555 pba = E1000_READ_REG(hw, E1000_PBA);
2556 tx_space = pba >> 16;
2557 pba &= 0xffff;
2558 min_tx = (adapter->max_frame_size +
2559 sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2560 min_tx = roundup2(min_tx, 1024);
2561 min_tx >>= 10;
2562 min_rx = adapter->max_frame_size;
2563 min_rx = roundup2(min_rx, 1024);
2564 min_rx >>= 10;
2565 if (tx_space < min_tx &&
2566 ((min_tx - tx_space) < pba)) {
2567 pba = pba - (min_tx - tx_space);
2569 * if short on rx space, rx wins
2570 * and must trump tx adjustment
2572 if (pba < min_rx)
2573 pba = min_rx;
2575 E1000_WRITE_REG(hw, E1000_PBA, pba);
2578 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2581 * These parameters control the automatic generation (Tx) and
2582 * response (Rx) to Ethernet PAUSE frames.
2583 * - High water mark should allow for at least two frames to be
2584 * received after sending an XOFF.
2585 * - Low water mark works best when it is very near the high water mark.
2586 * This allows the receiver to restart by sending XON when it has
2587 * drained a bit.
2589 hwm = min(((pba << 10) * 9 / 10),
2590 ((pba << 10) - 2 * adapter->max_frame_size));
2592 if (hw->mac.type < e1000_82576) {
2593 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
2594 fc->low_water = fc->high_water - 8;
2595 } else {
2596 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
2597 fc->low_water = fc->high_water - 16;
2600 fc->pause_time = IGB_FC_PAUSE_TIME;
2601 fc->send_xon = TRUE;
2603 /* Set Flow control, use the tunable location if sane */
2604 if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2605 fc->requested_mode = igb_fc_setting;
2606 else
2607 fc->requested_mode = e1000_fc_none;
2609 fc->current_mode = fc->requested_mode;
2611 /* Issue a global reset */
2612 e1000_reset_hw(hw);
2613 E1000_WRITE_REG(hw, E1000_WUC, 0);
2615 if (e1000_init_hw(hw) < 0)
2616 device_printf(dev, "Hardware Initialization Failed\n");
2618 if (hw->mac.type == e1000_82580) {
2619 u32 reg;
2621 hwm = (pba << 10) - (2 * adapter->max_frame_size);
2623 * 0x80000000 - enable DMA COAL
2624 * 0x10000000 - use L0s as low power
2625 * 0x20000000 - use L1 as low power
2626 * X << 16 - exit dma coal when rx data exceeds X kB
2627 * Y - upper limit to stay in dma coal in units of 32usecs
2629 E1000_WRITE_REG(hw, E1000_DMACR,
2630 0xA0000006 | ((hwm << 6) & 0x00FF0000));
2632 /* set hwm to PBA - 2 * max frame size */
2633 E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2635 * This sets the time to wait before requesting transition to
2636 * low power state to number of usecs needed to receive 1 512
2637 * byte frame at gigabit line rate
2639 E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2641 /* free space in tx packet buffer to wake from DMA coal */
2642 E1000_WRITE_REG(hw, E1000_DMCTXTH,
2643 (20480 - (2 * adapter->max_frame_size)) >> 6);
2645 /* make low power state decision controlled by DMA coal */
2646 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2647 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2648 reg | E1000_PCIEMISC_LX_DECISION);
2651 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2652 e1000_get_phy_info(hw);
2653 e1000_check_for_link(hw);
2654 return;
2657 /*********************************************************************
2659 * Setup networking device structure and register an interface.
2661 **********************************************************************/
2662 static void
2663 igb_setup_interface(device_t dev, struct adapter *adapter)
2665 struct ifnet *ifp;
2667 INIT_DEBUGOUT("igb_setup_interface: begin");
2669 ifp = adapter->ifp = &adapter->arpcom.ac_if;
2670 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2671 ifp->if_mtu = ETHERMTU;
2672 ifp->if_init = igb_init;
2673 ifp->if_softc = adapter;
2674 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2675 ifp->if_ioctl = igb_ioctl;
2676 ifp->if_start = igb_start;
2677 #if __FreeBSD_version >= 800000
2678 ifp->if_transmit = igb_mq_start;
2679 ifp->if_qflush = igb_qflush;
2680 #endif
2681 ifq_set_maxlen(&ifp->if_snd, adapter->num_tx_desc - 1);
2682 ifq_set_ready(&ifp->if_snd);
2684 ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2686 ifp->if_capabilities = ifp->if_capenable = 0;
2688 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2689 #ifdef NET_TSO
2690 ifp->if_capabilities |= IFCAP_TSO4;
2691 #endif
2692 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2693 #ifdef NET_LRO
2694 if (igb_header_split)
2695 ifp->if_capabilities |= IFCAP_LRO;
2696 #endif
2698 ifp->if_capenable = ifp->if_capabilities;
2699 #ifdef DEVICE_POLLING
2700 ifp->if_capabilities |= IFCAP_POLLING;
2701 #endif
2704 * Tell the upper layer(s) we support long frames.
2706 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2707 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2708 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2711 * Specify the media types supported by this adapter and register
2712 * callbacks to update media and link information
2714 ifmedia_init(&adapter->media, IFM_IMASK,
2715 igb_media_change, igb_media_status);
2716 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2717 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2718 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2719 0, NULL);
2720 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2721 } else {
2722 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2723 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2724 0, NULL);
2725 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2726 0, NULL);
2727 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2728 0, NULL);
2729 if (adapter->hw.phy.type != e1000_phy_ife) {
2730 ifmedia_add(&adapter->media,
2731 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2732 ifmedia_add(&adapter->media,
2733 IFM_ETHER | IFM_1000_T, 0, NULL);
2736 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2737 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2742 * Manage DMA'able memory.
2744 static void
2745 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2747 if (error)
2748 return;
2749 *(bus_addr_t *) arg = segs[0].ds_addr;
2752 static int
2753 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2754 struct igb_dma_alloc *dma, int mapflags)
2756 int error;
2758 error = bus_dma_tag_create(NULL, /* parent */
2759 IGB_DBA_ALIGN, 0, /* alignment, bounds */
2760 BUS_SPACE_MAXADDR, /* lowaddr */
2761 BUS_SPACE_MAXADDR, /* highaddr */
2762 NULL, NULL, /* filter, filterarg */
2763 size, /* maxsize */
2764 1, /* nsegments */
2765 size, /* maxsegsize */
2766 0, /* flags */
2767 &dma->dma_tag);
2768 if (error) {
2769 device_printf(adapter->dev,
2770 "%s: bus_dma_tag_create failed: %d\n",
2771 __func__, error);
2772 goto fail_0;
2775 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2776 BUS_DMA_NOWAIT, &dma->dma_map);
2777 if (error) {
2778 device_printf(adapter->dev,
2779 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2780 __func__, (uintmax_t)size, error);
2781 goto fail_2;
2784 dma->dma_paddr = 0;
2785 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2786 size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2787 if (error || dma->dma_paddr == 0) {
2788 device_printf(adapter->dev,
2789 "%s: bus_dmamap_load failed: %d\n",
2790 __func__, error);
2791 goto fail_3;
2794 return (0);
2796 fail_3:
2797 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2798 fail_2:
2799 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2800 bus_dma_tag_destroy(dma->dma_tag);
2801 fail_0:
2802 dma->dma_map = NULL;
2803 dma->dma_tag = NULL;
2805 return (error);
2808 static void
2809 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2811 if (dma->dma_tag == NULL)
2812 return;
2813 if (dma->dma_map != NULL) {
2814 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2815 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2816 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2817 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2818 dma->dma_map = NULL;
2820 bus_dma_tag_destroy(dma->dma_tag);
2821 dma->dma_tag = NULL;
2825 /*********************************************************************
2827 * Allocate memory for the transmit and receive rings, and then
2828 * the descriptors associated with each, called only once at attach.
2830 **********************************************************************/
2831 static int
2832 igb_allocate_queues(struct adapter *adapter)
2834 device_t dev = adapter->dev;
2835 struct igb_queue *que = NULL;
2836 struct tx_ring *txr = NULL;
2837 struct rx_ring *rxr = NULL;
2838 int rsize, tsize, error = E1000_SUCCESS;
2839 int txconf = 0, rxconf = 0;
2841 /* First allocate the top level queue structs */
2842 if (!(adapter->queues =
2843 (struct igb_queue *) kmalloc(sizeof(struct igb_queue) *
2844 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2845 device_printf(dev, "Unable to allocate queue memory\n");
2846 error = ENOMEM;
2847 goto fail;
2850 /* Next allocate the TX ring struct memory */
2851 if (!(adapter->tx_rings =
2852 (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2853 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2854 device_printf(dev, "Unable to allocate TX ring memory\n");
2855 error = ENOMEM;
2856 goto tx_fail;
2859 /* Now allocate the RX */
2860 if (!(adapter->rx_rings =
2861 (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2862 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2863 device_printf(dev, "Unable to allocate RX ring memory\n");
2864 error = ENOMEM;
2865 goto rx_fail;
2868 tsize = roundup2(adapter->num_tx_desc *
2869 sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2871 * Now set up the TX queues, txconf is needed to handle the
2872 * possibility that things fail midcourse and we need to
2873 * undo memory gracefully
2875 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2876 /* Set up some basics */
2877 txr = &adapter->tx_rings[i];
2878 txr->adapter = adapter;
2879 txr->me = i;
2881 /* Initialize the TX lock */
2882 ksnprintf(txr->spin_name, sizeof(txr->spin_name), "%s:tx(%d)",
2883 device_get_nameunit(dev), txr->me);
2885 spin_init(&txr->tx_spin);
2887 if (igb_dma_malloc(adapter, tsize,
2888 &txr->txdma, BUS_DMA_NOWAIT)) {
2889 device_printf(dev,
2890 "Unable to allocate TX Descriptor memory\n");
2891 error = ENOMEM;
2892 goto err_tx_desc;
2894 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2895 bzero((void *)txr->tx_base, tsize);
2897 /* Now allocate transmit buffers for the ring */
2898 if (igb_allocate_transmit_buffers(txr)) {
2899 device_printf(dev,
2900 "Critical Failure setting up transmit buffers\n");
2901 error = ENOMEM;
2902 goto err_tx_desc;
2904 #if __FreeBSD_version >= 800000
2905 /* Allocate a buf ring */
2906 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2907 M_WAITOK, &txr->tx_mtx);
2908 #endif
2912 * Next the RX queues...
2914 rsize = roundup2(adapter->num_rx_desc *
2915 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2916 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2917 rxr = &adapter->rx_rings[i];
2918 rxr->adapter = adapter;
2919 rxr->me = i;
2921 /* Initialize the RX lock */
2922 ksnprintf(rxr->spin_name, sizeof(rxr->spin_name), "%s:rx(%d)",
2923 device_get_nameunit(dev), txr->me);
2925 spin_init(&rxr->rx_spin);
2927 if (igb_dma_malloc(adapter, rsize,
2928 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2929 device_printf(dev,
2930 "Unable to allocate RxDescriptor memory\n");
2931 error = ENOMEM;
2932 goto err_rx_desc;
2934 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2935 bzero((void *)rxr->rx_base, rsize);
2937 /* Allocate receive buffers for the ring*/
2938 if (igb_allocate_receive_buffers(rxr)) {
2939 device_printf(dev,
2940 "Critical Failure setting up receive buffers\n");
2941 error = ENOMEM;
2942 goto err_rx_desc;
2947 ** Finally set up the queue holding structs
2949 for (int i = 0; i < adapter->num_queues; i++) {
2950 que = &adapter->queues[i];
2951 que->adapter = adapter;
2952 que->txr = &adapter->tx_rings[i];
2953 que->rxr = &adapter->rx_rings[i];
2956 return (0);
2958 err_rx_desc:
2959 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2960 igb_dma_free(adapter, &rxr->rxdma);
2961 err_tx_desc:
2962 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2963 igb_dma_free(adapter, &txr->txdma);
2964 kfree(adapter->rx_rings, M_DEVBUF);
2965 rx_fail:
2966 #if __FreeBSD_version >= 800000
2967 buf_ring_free(txr->br, M_DEVBUF);
2968 #endif
2969 kfree(adapter->tx_rings, M_DEVBUF);
2970 tx_fail:
2971 kfree(adapter->queues, M_DEVBUF);
2972 fail:
2973 return (error);
2976 /*********************************************************************
2978 * Allocate memory for tx_buffer structures. The tx_buffer stores all
2979 * the information needed to transmit a packet on the wire. This is
2980 * called only once at attach, setup is done every reset.
2982 **********************************************************************/
2983 static int
2984 igb_allocate_transmit_buffers(struct tx_ring *txr)
2986 struct adapter *adapter = txr->adapter;
2987 device_t dev = adapter->dev;
2988 struct igb_tx_buffer *txbuf;
2989 int error, i;
2992 * Setup DMA descriptor areas.
2994 if ((error = bus_dma_tag_create(NULL,
2995 1, 0, /* alignment, bounds */
2996 BUS_SPACE_MAXADDR, /* lowaddr */
2997 BUS_SPACE_MAXADDR, /* highaddr */
2998 NULL, NULL, /* filter, filterarg */
2999 IGB_TSO_SIZE, /* maxsize */
3000 IGB_MAX_SCATTER, /* nsegments */
3001 PAGE_SIZE, /* maxsegsize */
3002 0, /* flags */
3003 &txr->txtag))) {
3004 device_printf(dev,"Unable to allocate TX DMA tag\n");
3005 goto fail;
3008 if (!(txr->tx_buffers =
3009 (struct igb_tx_buffer *) kmalloc(sizeof(struct igb_tx_buffer) *
3010 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3011 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3012 error = ENOMEM;
3013 goto fail;
3016 /* Create the descriptor buffer dma maps */
3017 txbuf = txr->tx_buffers;
3018 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3019 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3020 if (error != 0) {
3021 device_printf(dev, "Unable to create TX DMA map\n");
3022 goto fail;
3026 return 0;
3027 fail:
3028 /* We free all, it handles case where we are in the middle */
3029 igb_free_transmit_structures(adapter);
3030 return (error);
3033 /*********************************************************************
3035 * Initialize a transmit ring.
3037 **********************************************************************/
3038 static void
3039 igb_setup_transmit_ring(struct tx_ring *txr)
3041 struct adapter *adapter = txr->adapter;
3042 struct igb_tx_buffer *txbuf;
3043 int i;
3045 /* Clear the old descriptor contents */
3046 IGB_TX_LOCK(txr);
3047 bzero((void *)txr->tx_base,
3048 (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3049 /* Reset indices */
3050 txr->next_avail_desc = 0;
3051 txr->next_to_clean = 0;
3053 /* Free any existing tx buffers. */
3054 txbuf = txr->tx_buffers;
3055 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3056 if (txbuf->m_head != NULL) {
3057 bus_dmamap_sync(txr->txtag, txbuf->map,
3058 BUS_DMASYNC_POSTWRITE);
3059 bus_dmamap_unload(txr->txtag, txbuf->map);
3060 m_freem(txbuf->m_head);
3061 txbuf->m_head = NULL;
3063 /* clear the watch index */
3064 txbuf->next_eop = -1;
3067 /* Set number of descriptors available */
3068 txr->tx_avail = adapter->num_tx_desc;
3070 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3071 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3072 IGB_TX_UNLOCK(txr);
3075 /*********************************************************************
3077 * Initialize all transmit rings.
3079 **********************************************************************/
3080 static void
3081 igb_setup_transmit_structures(struct adapter *adapter)
3083 struct tx_ring *txr = adapter->tx_rings;
3085 for (int i = 0; i < adapter->num_queues; i++, txr++)
3086 igb_setup_transmit_ring(txr);
3088 return;
3091 /*********************************************************************
3093 * Enable transmit unit.
3095 **********************************************************************/
3096 static void
3097 igb_initialize_transmit_units(struct adapter *adapter)
3099 struct tx_ring *txr = adapter->tx_rings;
3100 struct e1000_hw *hw = &adapter->hw;
3101 u32 tctl, txdctl;
3103 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3105 /* Setup the Tx Descriptor Rings */
3106 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3107 u64 bus_addr = txr->txdma.dma_paddr;
3109 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3110 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3111 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3112 (uint32_t)(bus_addr >> 32));
3113 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3114 (uint32_t)bus_addr);
3116 /* Setup the HW Tx Head and Tail descriptor pointers */
3117 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3118 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3120 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3121 E1000_READ_REG(hw, E1000_TDBAL(i)),
3122 E1000_READ_REG(hw, E1000_TDLEN(i)));
3124 txr->watchdog_check = FALSE;
3126 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3127 txdctl |= IGB_TX_PTHRESH;
3128 txdctl |= IGB_TX_HTHRESH << 8;
3129 txdctl |= IGB_TX_WTHRESH << 16;
3130 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3131 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3134 /* Program the Transmit Control Register */
3135 tctl = E1000_READ_REG(hw, E1000_TCTL);
3136 tctl &= ~E1000_TCTL_CT;
3137 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3138 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3140 e1000_config_collision_dist(hw);
3142 /* This write will effectively turn on the transmit unit. */
3143 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3146 /*********************************************************************
3148 * Free all transmit rings.
3150 **********************************************************************/
3151 static void
3152 igb_free_transmit_structures(struct adapter *adapter)
3154 struct tx_ring *txr = adapter->tx_rings;
3156 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3157 IGB_TX_LOCK(txr);
3158 igb_free_transmit_buffers(txr);
3159 igb_dma_free(adapter, &txr->txdma);
3160 IGB_TX_UNLOCK(txr);
3161 IGB_TX_LOCK_DESTROY(txr);
3163 kfree(adapter->tx_rings, M_DEVBUF);
3166 /*********************************************************************
3168 * Free transmit ring related data structures.
3170 **********************************************************************/
3171 static void
3172 igb_free_transmit_buffers(struct tx_ring *txr)
3174 struct adapter *adapter = txr->adapter;
3175 struct igb_tx_buffer *tx_buffer;
3176 int i;
3178 INIT_DEBUGOUT("free_transmit_ring: begin");
3180 if (txr->tx_buffers == NULL)
3181 return;
3183 tx_buffer = txr->tx_buffers;
3184 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3185 if (tx_buffer->m_head != NULL) {
3186 bus_dmamap_sync(txr->txtag, tx_buffer->map,
3187 BUS_DMASYNC_POSTWRITE);
3188 bus_dmamap_unload(txr->txtag,
3189 tx_buffer->map);
3190 m_freem(tx_buffer->m_head);
3191 tx_buffer->m_head = NULL;
3192 if (tx_buffer->map != NULL) {
3193 bus_dmamap_destroy(txr->txtag,
3194 tx_buffer->map);
3195 tx_buffer->map = NULL;
3197 } else if (tx_buffer->map != NULL) {
3198 bus_dmamap_unload(txr->txtag,
3199 tx_buffer->map);
3200 bus_dmamap_destroy(txr->txtag,
3201 tx_buffer->map);
3202 tx_buffer->map = NULL;
3205 #if __FreeBSD_version >= 800000
3206 if (txr->br != NULL)
3207 buf_ring_free(txr->br, M_DEVBUF);
3208 #endif
3209 if (txr->tx_buffers != NULL) {
3210 kfree(txr->tx_buffers, M_DEVBUF);
3211 txr->tx_buffers = NULL;
3213 if (txr->txtag != NULL) {
3214 bus_dma_tag_destroy(txr->txtag);
3215 txr->txtag = NULL;
3217 return;
3220 /**********************************************************************
3222 * Setup work for hardware segmentation offload (TSO)
3224 **********************************************************************/
3225 #ifdef NET_TSO
3226 static boolean_t
3227 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3229 struct adapter *adapter = txr->adapter;
3230 struct e1000_adv_tx_context_desc *TXD;
3231 struct igb_tx_buffer *tx_buffer;
3232 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3233 u32 mss_l4len_idx = 0;
3234 u16 vtag = 0;
3235 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3236 struct ether_vlan_header *eh;
3237 struct ip *ip;
3238 struct tcphdr *th;
3242 * Determine where frame payload starts.
3243 * Jump over vlan headers if already present
3245 eh = mtod(mp, struct ether_vlan_header *);
3246 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3247 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3248 else
3249 ehdrlen = ETHER_HDR_LEN;
3251 /* Ensure we have at least the IP+TCP header in the first mbuf. */
3252 if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3253 return FALSE;
3255 /* Only supports IPV4 for now */
3256 ctxd = txr->next_avail_desc;
3257 tx_buffer = &txr->tx_buffers[ctxd];
3258 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3260 ip = (struct ip *)(mp->m_data + ehdrlen);
3261 if (ip->ip_p != IPPROTO_TCP)
3262 return FALSE; /* 0 */
3263 ip->ip_sum = 0;
3264 ip_hlen = ip->ip_hl << 2;
3265 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3266 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3267 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3268 tcp_hlen = th->th_off << 2;
3270 * Calculate header length, this is used
3271 * in the transmit desc in igb_xmit
3273 *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3275 /* VLAN MACLEN IPLEN */
3276 if (mp->m_flags & M_VLANTAG) {
3277 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3278 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3281 vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3282 vlan_macip_lens |= ip_hlen;
3283 TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3285 /* ADV DTYPE TUCMD */
3286 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3287 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3288 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3289 TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3291 /* MSS L4LEN IDX */
3292 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3293 mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3294 /* 82575 needs the queue index added */
3295 if (adapter->hw.mac.type == e1000_82575)
3296 mss_l4len_idx |= txr->me << 4;
3297 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3299 TXD->seqnum_seed = htole32(0);
3300 tx_buffer->m_head = NULL;
3301 tx_buffer->next_eop = -1;
3303 if (++ctxd == adapter->num_tx_desc)
3304 ctxd = 0;
3306 txr->tx_avail--;
3307 txr->next_avail_desc = ctxd;
3308 return TRUE;
3310 #endif
3312 /*********************************************************************
3314 * Context Descriptor setup for VLAN or CSUM
3316 **********************************************************************/
3318 static bool
3319 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3321 struct adapter *adapter = txr->adapter;
3322 struct e1000_adv_tx_context_desc *TXD;
3323 struct igb_tx_buffer *tx_buffer;
3324 u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3325 struct ether_vlan_header *eh;
3326 struct ip *ip = NULL;
3327 struct ip6_hdr *ip6;
3328 int ehdrlen, ctxd, ip_hlen = 0;
3329 u16 etype, vtag = 0;
3330 u8 ipproto = 0;
3331 bool offload = TRUE;
3333 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3334 offload = FALSE;
3336 vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3337 ctxd = txr->next_avail_desc;
3338 tx_buffer = &txr->tx_buffers[ctxd];
3339 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3342 ** In advanced descriptors the vlan tag must
3343 ** be placed into the context descriptor, thus
3344 ** we need to be here just for that setup.
3346 if (mp->m_flags & M_VLANTAG) {
3347 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3348 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3349 } else if (offload == FALSE)
3350 return FALSE;
3353 * Determine where frame payload starts.
3354 * Jump over vlan headers if already present,
3355 * helpful for QinQ too.
3357 eh = mtod(mp, struct ether_vlan_header *);
3358 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3359 etype = ntohs(eh->evl_proto);
3360 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3361 } else {
3362 etype = ntohs(eh->evl_encap_proto);
3363 ehdrlen = ETHER_HDR_LEN;
3366 /* Set the ether header length */
3367 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3369 switch (etype) {
3370 case ETHERTYPE_IP:
3371 ip = (struct ip *)(mp->m_data + ehdrlen);
3372 ip_hlen = ip->ip_hl << 2;
3373 if (mp->m_len < ehdrlen + ip_hlen) {
3374 offload = FALSE;
3375 break;
3377 ipproto = ip->ip_p;
3378 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3379 break;
3380 case ETHERTYPE_IPV6:
3381 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3382 ip_hlen = sizeof(struct ip6_hdr);
3383 if (mp->m_len < ehdrlen + ip_hlen)
3384 return (FALSE);
3385 ipproto = ip6->ip6_nxt;
3386 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3387 break;
3388 default:
3389 offload = FALSE;
3390 break;
3393 vlan_macip_lens |= ip_hlen;
3394 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3396 switch (ipproto) {
3397 case IPPROTO_TCP:
3398 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3399 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3400 break;
3401 case IPPROTO_UDP:
3402 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3403 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3404 break;
3405 #if __FreeBSD_version >= 800000
3406 case IPPROTO_SCTP:
3407 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3408 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3409 break;
3410 #endif
3411 default:
3412 offload = FALSE;
3413 break;
3416 /* 82575 needs the queue index added */
3417 if (adapter->hw.mac.type == e1000_82575)
3418 mss_l4len_idx = txr->me << 4;
3420 /* Now copy bits into descriptor */
3421 TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3422 TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3423 TXD->seqnum_seed = htole32(0);
3424 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3426 tx_buffer->m_head = NULL;
3427 tx_buffer->next_eop = -1;
3429 /* We've consumed the first desc, adjust counters */
3430 if (++ctxd == adapter->num_tx_desc)
3431 ctxd = 0;
3432 txr->next_avail_desc = ctxd;
3433 --txr->tx_avail;
3435 return (offload);
3439 /**********************************************************************
3441 * Examine each tx_buffer in the used queue. If the hardware is done
3442 * processing the packet then free associated resources. The
3443 * tx_buffer is put back on the free queue.
3445 * TRUE return means there's work in the ring to clean, FALSE its empty.
3446 **********************************************************************/
3447 static bool
3448 igb_txeof(struct tx_ring *txr)
3450 struct adapter *adapter = txr->adapter;
3451 int first, last, done;
3452 struct igb_tx_buffer *tx_buffer;
3453 struct e1000_tx_desc *tx_desc, *eop_desc;
3454 struct ifnet *ifp = adapter->ifp;
3456 IGB_TX_LOCK_ASSERT(txr);
3458 if (txr->tx_avail == adapter->num_tx_desc)
3459 return FALSE;
3461 first = txr->next_to_clean;
3462 tx_desc = &txr->tx_base[first];
3463 tx_buffer = &txr->tx_buffers[first];
3464 last = tx_buffer->next_eop;
3465 eop_desc = &txr->tx_base[last];
3468 * What this does is get the index of the
3469 * first descriptor AFTER the EOP of the
3470 * first packet, that way we can do the
3471 * simple comparison on the inner while loop.
3473 if (++last == adapter->num_tx_desc)
3474 last = 0;
3475 done = last;
3477 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3478 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3480 while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3481 /* We clean the range of the packet */
3482 while (first != done) {
3483 tx_desc->upper.data = 0;
3484 tx_desc->lower.data = 0;
3485 tx_desc->buffer_addr = 0;
3486 ++txr->tx_avail;
3488 if (tx_buffer->m_head) {
3489 txr->bytes +=
3490 tx_buffer->m_head->m_pkthdr.len;
3491 bus_dmamap_sync(txr->txtag,
3492 tx_buffer->map,
3493 BUS_DMASYNC_POSTWRITE);
3494 bus_dmamap_unload(txr->txtag,
3495 tx_buffer->map);
3497 m_freem(tx_buffer->m_head);
3498 tx_buffer->m_head = NULL;
3500 tx_buffer->next_eop = -1;
3501 txr->watchdog_time = ticks;
3503 if (++first == adapter->num_tx_desc)
3504 first = 0;
3506 tx_buffer = &txr->tx_buffers[first];
3507 tx_desc = &txr->tx_base[first];
3509 ++txr->packets;
3510 ++ifp->if_opackets;
3511 /* See if we can continue to the next packet */
3512 last = tx_buffer->next_eop;
3513 if (last != -1) {
3514 eop_desc = &txr->tx_base[last];
3515 /* Get new done point */
3516 if (++last == adapter->num_tx_desc) last = 0;
3517 done = last;
3518 } else
3519 break;
3521 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3522 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3524 txr->next_to_clean = first;
3527 * If we have enough room, clear IFF_DRV_OACTIVE
3528 * to tell the stack that it is OK to send packets.
3530 if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3531 ifp->if_flags &= ~IFF_OACTIVE;
3532 /* All clean, turn off the watchdog */
3533 if (txr->tx_avail == adapter->num_tx_desc) {
3534 txr->watchdog_check = FALSE;
3535 return FALSE;
3539 return (TRUE);
3543 /*********************************************************************
3545 * Setup descriptor buffer(s) from system mbuf buffer pools.
3546 * i - designates the ring index
3547 * clean - tells the function whether to update
3548 * the header, the packet buffer, or both.
3550 **********************************************************************/
3551 static int
3552 igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3554 struct adapter *adapter = rxr->adapter;
3555 struct igb_rx_buf *rxbuf;
3556 struct mbuf *mh, *mp;
3557 bus_dma_segment_t hseg[1];
3558 bus_dma_segment_t pseg[1];
3559 bus_dmamap_t map;
3560 int nsegs, error;
3563 rxbuf = &rxr->rx_buffers[i];
3564 mh = mp = NULL;
3565 if ((clean & IGB_CLEAN_HEADER) != 0) {
3566 mh = m_gethdr(MB_DONTWAIT, MT_DATA);
3567 if (mh == NULL) {
3568 adapter->mbuf_header_failed++;
3569 return (ENOBUFS);
3571 mh->m_pkthdr.len = mh->m_len = MHLEN;
3573 * Because IGB_HDR_BUF size is less than MHLEN
3574 * and we configure controller to split headers
3575 * we can align mbuf on ETHER_ALIGN boundary.
3577 m_adj(mh, ETHER_ALIGN);
3578 error = bus_dmamap_load_mbuf_segment(rxr->rx_htag,
3579 rxr->rx_hspare_map, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3580 if (error != 0) {
3581 m_freem(mh);
3582 return (error);
3584 mh->m_flags &= ~M_PKTHDR;
3586 if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3587 mp = m_getl(adapter->rx_mbuf_sz,
3588 MB_DONTWAIT, MT_DATA, M_PKTHDR, NULL);
3589 #if 0
3590 mp = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR,
3591 adapter->rx_mbuf_sz);
3592 #endif
3593 if (mp == NULL) {
3594 if (mh != NULL) {
3595 adapter->mbuf_packet_failed++;
3596 bus_dmamap_unload(rxr->rx_htag,
3597 rxbuf->head_map);
3598 mh->m_flags |= M_PKTHDR;
3599 m_freem(mh);
3601 return (ENOBUFS);
3603 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3604 error = bus_dmamap_load_mbuf_segment(rxr->rx_ptag,
3605 rxr->rx_pspare_map, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3606 if (error != 0) {
3607 if (mh != NULL) {
3608 bus_dmamap_unload(rxr->rx_htag,
3609 rxbuf->head_map);
3610 mh->m_flags |= M_PKTHDR;
3611 m_freem(mh);
3613 m_freem(mp);
3614 return (error);
3616 mp->m_flags &= ~M_PKTHDR;
3619 /* Loading new DMA maps complete, unload maps for received buffers. */
3620 if ((clean & IGB_CLEAN_HEADER) != 0 && rxbuf->m_head != NULL) {
3621 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3622 BUS_DMASYNC_POSTREAD);
3623 bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3625 if ((clean & IGB_CLEAN_PAYLOAD) != 0 && rxbuf->m_pack != NULL) {
3626 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3627 BUS_DMASYNC_POSTREAD);
3628 bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3631 /* Reflect loaded dmamaps. */
3632 if ((clean & IGB_CLEAN_HEADER) != 0) {
3633 map = rxbuf->head_map;
3634 rxbuf->head_map = rxr->rx_hspare_map;
3635 rxr->rx_hspare_map = map;
3636 rxbuf->m_head = mh;
3637 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3638 BUS_DMASYNC_PREREAD);
3639 rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr);
3641 if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3642 map = rxbuf->pack_map;
3643 rxbuf->pack_map = rxr->rx_pspare_map;
3644 rxr->rx_pspare_map = map;
3645 rxbuf->m_pack = mp;
3646 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3647 BUS_DMASYNC_PREREAD);
3648 rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr);
3651 return (0);
3654 /*********************************************************************
3656 * Allocate memory for rx_buffer structures. Since we use one
3657 * rx_buffer per received packet, the maximum number of rx_buffer's
3658 * that we'll need is equal to the number of receive descriptors
3659 * that we've allocated.
3661 **********************************************************************/
3662 static int
3663 igb_allocate_receive_buffers(struct rx_ring *rxr)
3665 struct adapter *adapter = rxr->adapter;
3666 device_t dev = adapter->dev;
3667 struct igb_rx_buf *rxbuf;
3668 int i, bsize, error;
3670 bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3671 if (!(rxr->rx_buffers =
3672 (struct igb_rx_buf *) kmalloc(bsize,
3673 M_DEVBUF, M_NOWAIT | M_ZERO))) {
3674 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3675 error = ENOMEM;
3676 goto fail;
3679 if ((error = bus_dma_tag_create(NULL,
3680 1, 0, /* alignment, bounds */
3681 BUS_SPACE_MAXADDR, /* lowaddr */
3682 BUS_SPACE_MAXADDR, /* highaddr */
3683 NULL, NULL, /* filter, filterarg */
3684 MSIZE, /* maxsize */
3685 1, /* nsegments */
3686 MSIZE, /* maxsegsize */
3687 0, /* flags */
3688 &rxr->rx_htag))) {
3689 device_printf(dev, "Unable to create RX DMA tag\n");
3690 goto fail;
3693 if ((error = bus_dma_tag_create(NULL,
3694 1, 0, /* alignment, bounds */
3695 BUS_SPACE_MAXADDR, /* lowaddr */
3696 BUS_SPACE_MAXADDR, /* highaddr */
3697 NULL, NULL, /* filter, filterarg */
3698 MJUMPAGESIZE, /* maxsize */
3699 1, /* nsegments */
3700 MJUMPAGESIZE, /* maxsegsize */
3701 0, /* flags */
3702 &rxr->rx_ptag))) {
3703 device_printf(dev, "Unable to create RX payload DMA tag\n");
3704 goto fail;
3707 /* Create the spare maps (used by getbuf) */
3708 error = bus_dmamap_create(rxr->rx_htag, BUS_DMA_NOWAIT,
3709 &rxr->rx_hspare_map);
3710 if (error) {
3711 device_printf(dev,
3712 "%s: bus_dmamap_create header spare failed: %d\n",
3713 __func__, error);
3714 goto fail;
3716 error = bus_dmamap_create(rxr->rx_ptag, BUS_DMA_NOWAIT,
3717 &rxr->rx_pspare_map);
3718 if (error) {
3719 device_printf(dev,
3720 "%s: bus_dmamap_create packet spare failed: %d\n",
3721 __func__, error);
3722 goto fail;
3725 for (i = 0; i < adapter->num_rx_desc; i++) {
3726 rxbuf = &rxr->rx_buffers[i];
3727 error = bus_dmamap_create(rxr->rx_htag,
3728 BUS_DMA_NOWAIT, &rxbuf->head_map);
3729 if (error) {
3730 device_printf(dev,
3731 "Unable to create RX head DMA maps\n");
3732 goto fail;
3734 error = bus_dmamap_create(rxr->rx_ptag,
3735 BUS_DMA_NOWAIT, &rxbuf->pack_map);
3736 if (error) {
3737 device_printf(dev,
3738 "Unable to create RX packet DMA maps\n");
3739 goto fail;
3743 return (0);
3745 fail:
3746 /* Frees all, but can handle partial completion */
3747 igb_free_receive_structures(adapter);
3748 return (error);
3752 static void
3753 igb_free_receive_ring(struct rx_ring *rxr)
3755 struct adapter *adapter;
3756 struct igb_rx_buf *rxbuf;
3757 int i;
3759 adapter = rxr->adapter;
3760 for (i = 0; i < adapter->num_rx_desc; i++) {
3761 rxbuf = &rxr->rx_buffers[i];
3762 if (rxbuf->m_head != NULL) {
3763 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3764 BUS_DMASYNC_POSTREAD);
3765 bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3766 rxbuf->m_head->m_flags |= M_PKTHDR;
3767 m_freem(rxbuf->m_head);
3769 if (rxbuf->m_pack != NULL) {
3770 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3771 BUS_DMASYNC_POSTREAD);
3772 bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3773 rxbuf->m_pack->m_flags |= M_PKTHDR;
3774 m_freem(rxbuf->m_pack);
3776 rxbuf->m_head = NULL;
3777 rxbuf->m_pack = NULL;
3782 /*********************************************************************
3784 * Initialize a receive ring and its buffers.
3786 **********************************************************************/
3787 static int
3788 igb_setup_receive_ring(struct rx_ring *rxr)
3790 struct adapter *adapter;
3791 struct ifnet *ifp;
3792 device_t dev;
3793 #ifdef NET_LRO
3794 struct lro_ctrl *lro = &rxr->lro;
3795 #endif
3796 int j, rsize, error = 0;
3798 adapter = rxr->adapter;
3799 dev = adapter->dev;
3800 ifp = adapter->ifp;
3802 /* Clear the ring contents */
3803 IGB_RX_LOCK(rxr);
3804 rsize = roundup2(adapter->num_rx_desc *
3805 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3806 bzero((void *)rxr->rx_base, rsize);
3809 ** Free current RX buffer structures and their mbufs
3811 igb_free_receive_ring(rxr);
3813 /* Now replenish the ring mbufs */
3814 for (j = 0; j < adapter->num_rx_desc; j++) {
3815 if ((error = igb_get_buf(rxr, j, IGB_CLEAN_BOTH)) != 0)
3816 goto fail;
3819 /* Setup our descriptor indices */
3820 rxr->next_to_check = 0;
3821 rxr->last_cleaned = 0;
3822 rxr->lro_enabled = FALSE;
3824 if (igb_header_split)
3825 rxr->hdr_split = TRUE;
3826 #if NET_LRO
3827 else
3828 ifp->if_capabilities &= ~IFCAP_LRO;
3829 #endif
3831 rxr->fmp = NULL;
3832 rxr->lmp = NULL;
3833 rxr->discard = FALSE;
3835 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3836 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3839 ** Now set up the LRO interface, we
3840 ** also only do head split when LRO
3841 ** is enabled, since so often they
3842 ** are undesireable in similar setups.
3844 #if NET_LRO
3845 if (ifp->if_capenable & IFCAP_LRO) {
3846 int err = tcp_lro_init(lro);
3847 if (err) {
3848 device_printf(dev, "LRO Initialization failed!\n");
3849 goto fail;
3851 INIT_DEBUGOUT("RX LRO Initialized\n");
3852 rxr->lro_enabled = TRUE;
3853 lro->ifp = adapter->ifp;
3855 #endif
3857 IGB_RX_UNLOCK(rxr);
3858 return (0);
3860 fail:
3861 igb_free_receive_ring(rxr);
3862 IGB_RX_UNLOCK(rxr);
3863 return (error);
3866 /*********************************************************************
3868 * Initialize all receive rings.
3870 **********************************************************************/
3871 static int
3872 igb_setup_receive_structures(struct adapter *adapter)
3874 struct rx_ring *rxr = adapter->rx_rings;
3875 int i, j;
3877 for (i = 0; i < adapter->num_queues; i++, rxr++)
3878 if (igb_setup_receive_ring(rxr))
3879 goto fail;
3881 return (0);
3882 fail:
3884 * Free RX buffers allocated so far, we will only handle
3885 * the rings that completed, the failing case will have
3886 * cleaned up for itself. The value of 'i' will be the
3887 * failed ring so we must pre-decrement it.
3889 rxr = adapter->rx_rings;
3890 for (--i; i > 0; i--, rxr++) {
3891 for (j = 0; j < adapter->num_rx_desc; j++)
3892 igb_free_receive_ring(rxr);
3895 return (ENOBUFS);
3898 /*********************************************************************
3900 * Enable receive unit.
3902 **********************************************************************/
3903 static void
3904 igb_initialize_receive_units(struct adapter *adapter)
3906 struct rx_ring *rxr = adapter->rx_rings;
3907 struct ifnet *ifp = adapter->ifp;
3908 struct e1000_hw *hw = &adapter->hw;
3909 u32 rctl, rxcsum, psize, srrctl = 0;
3911 INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3914 * Make sure receives are disabled while setting
3915 * up the descriptor ring
3917 rctl = E1000_READ_REG(hw, E1000_RCTL);
3918 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3921 ** Set up for header split
3923 if (rxr->hdr_split) {
3924 /* Use a standard mbuf for the header */
3925 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3926 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3927 } else
3928 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3931 ** Set up for jumbo frames
3933 if (ifp->if_mtu > ETHERMTU) {
3934 rctl |= E1000_RCTL_LPE;
3935 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3936 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3938 /* Set maximum packet len */
3939 psize = adapter->max_frame_size;
3940 /* are we on a vlan? */
3941 if (adapter->ifp->if_vlantrunks != NULL)
3942 psize += VLAN_TAG_SIZE;
3943 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3944 } else {
3945 rctl &= ~E1000_RCTL_LPE;
3946 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3947 rctl |= E1000_RCTL_SZ_2048;
3950 /* Setup the Base and Length of the Rx Descriptor Rings */
3951 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3952 u64 bus_addr = rxr->rxdma.dma_paddr;
3953 u32 rxdctl;
3955 E1000_WRITE_REG(hw, E1000_RDLEN(i),
3956 adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3957 E1000_WRITE_REG(hw, E1000_RDBAH(i),
3958 (uint32_t)(bus_addr >> 32));
3959 E1000_WRITE_REG(hw, E1000_RDBAL(i),
3960 (uint32_t)bus_addr);
3961 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3962 /* Enable this Queue */
3963 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3964 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3965 rxdctl &= 0xFFF00000;
3966 rxdctl |= IGB_RX_PTHRESH;
3967 rxdctl |= IGB_RX_HTHRESH << 8;
3968 rxdctl |= IGB_RX_WTHRESH << 16;
3969 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3973 ** Setup for RX MultiQueue
3975 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3976 if (adapter->num_queues >1) {
3977 u32 random[10], mrqc, shift = 0;
3978 union igb_reta {
3979 u32 dword;
3980 u8 bytes[4];
3981 } reta;
3983 karc4rand(&random, sizeof(random));
3984 if (adapter->hw.mac.type == e1000_82575)
3985 shift = 6;
3986 /* Warning FM follows */
3987 for (int i = 0; i < 128; i++) {
3988 reta.bytes[i & 3] =
3989 (i % adapter->num_queues) << shift;
3990 if ((i & 3) == 3)
3991 E1000_WRITE_REG(hw,
3992 E1000_RETA(i >> 2), reta.dword);
3994 /* Now fill in hash table */
3995 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3996 for (int i = 0; i < 10; i++)
3997 E1000_WRITE_REG_ARRAY(hw,
3998 E1000_RSSRK(0), i, random[i]);
4000 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4001 E1000_MRQC_RSS_FIELD_IPV4_TCP);
4002 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4003 E1000_MRQC_RSS_FIELD_IPV6_TCP);
4004 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4005 E1000_MRQC_RSS_FIELD_IPV6_UDP);
4006 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4007 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4009 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4012 ** NOTE: Receive Full-Packet Checksum Offload
4013 ** is mutually exclusive with Multiqueue. However
4014 ** this is not the same as TCP/IP checksums which
4015 ** still work.
4017 rxcsum |= E1000_RXCSUM_PCSD;
4018 #if __FreeBSD_version >= 800000
4019 /* For SCTP Offload */
4020 if ((hw->mac.type == e1000_82576)
4021 && (ifp->if_capenable & IFCAP_RXCSUM))
4022 rxcsum |= E1000_RXCSUM_CRCOFL;
4023 #endif
4024 } else {
4025 /* Non RSS setup */
4026 if (ifp->if_capenable & IFCAP_RXCSUM) {
4027 rxcsum |= E1000_RXCSUM_IPPCSE;
4028 #if __FreeBSD_version >= 800000
4029 if (adapter->hw.mac.type == e1000_82576)
4030 rxcsum |= E1000_RXCSUM_CRCOFL;
4031 #endif
4032 } else
4033 rxcsum &= ~E1000_RXCSUM_TUOFL;
4035 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4037 /* Setup the Receive Control Register */
4038 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4039 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4040 E1000_RCTL_RDMTS_HALF |
4041 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4042 /* Strip CRC bytes. */
4043 rctl |= E1000_RCTL_SECRC;
4044 /* Make sure VLAN Filters are off */
4045 rctl &= ~E1000_RCTL_VFE;
4046 /* Don't store bad packets */
4047 rctl &= ~E1000_RCTL_SBP;
4049 /* Enable Receives */
4050 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4053 * Setup the HW Rx Head and Tail Descriptor Pointers
4054 * - needs to be after enable
4056 for (int i = 0; i < adapter->num_queues; i++) {
4057 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4058 E1000_WRITE_REG(hw, E1000_RDT(i),
4059 adapter->num_rx_desc - 1);
4061 return;
4064 /*********************************************************************
4066 * Free receive rings.
4068 **********************************************************************/
4069 static void
4070 igb_free_receive_structures(struct adapter *adapter)
4072 struct rx_ring *rxr = adapter->rx_rings;
4074 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4075 #ifdef NET_LRO
4076 struct lro_ctrl *lro = &rxr->lro;
4077 #endif
4078 igb_free_receive_buffers(rxr);
4079 #ifdef NET_LRO
4080 tcp_lro_free(lro);
4081 #endif
4082 igb_dma_free(adapter, &rxr->rxdma);
4085 kfree(adapter->rx_rings, M_DEVBUF);
4088 /*********************************************************************
4090 * Free receive ring data structures.
4092 **********************************************************************/
4093 static void
4094 igb_free_receive_buffers(struct rx_ring *rxr)
4096 struct adapter *adapter = rxr->adapter;
4097 struct igb_rx_buf *rxbuf;
4098 int i;
4100 INIT_DEBUGOUT("free_receive_structures: begin");
4102 if (rxr->rx_hspare_map != NULL) {
4103 bus_dmamap_destroy(rxr->rx_htag, rxr->rx_hspare_map);
4104 rxr->rx_hspare_map = NULL;
4107 if (rxr->rx_hspare_map != NULL) {
4108 bus_dmamap_destroy(rxr->rx_ptag, rxr->rx_pspare_map);
4109 rxr->rx_pspare_map = NULL;
4112 /* Cleanup any existing buffers */
4113 if (rxr->rx_buffers != NULL) {
4114 for (i = 0; i < adapter->num_rx_desc; i++) {
4115 rxbuf = &rxr->rx_buffers[i];
4116 if (rxbuf->m_head != NULL) {
4117 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
4118 BUS_DMASYNC_POSTREAD);
4119 bus_dmamap_unload(rxr->rx_htag,
4120 rxbuf->head_map);
4121 rxbuf->m_head->m_flags |= M_PKTHDR;
4122 m_freem(rxbuf->m_head);
4124 if (rxbuf->m_pack != NULL) {
4125 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
4126 BUS_DMASYNC_POSTREAD);
4127 bus_dmamap_unload(rxr->rx_ptag,
4128 rxbuf->pack_map);
4129 rxbuf->m_pack->m_flags |= M_PKTHDR;
4130 m_freem(rxbuf->m_pack);
4132 rxbuf->m_head = NULL;
4133 rxbuf->m_pack = NULL;
4134 if (rxbuf->head_map != NULL) {
4135 bus_dmamap_destroy(rxr->rx_htag,
4136 rxbuf->head_map);
4137 rxbuf->head_map = NULL;
4139 if (rxbuf->pack_map != NULL) {
4140 bus_dmamap_destroy(rxr->rx_ptag,
4141 rxbuf->pack_map);
4142 rxbuf->pack_map = NULL;
4145 if (rxr->rx_buffers != NULL) {
4146 kfree(rxr->rx_buffers, M_DEVBUF);
4147 rxr->rx_buffers = NULL;
4151 if (rxr->rx_htag != NULL) {
4152 bus_dma_tag_destroy(rxr->rx_htag);
4153 rxr->rx_htag = NULL;
4155 if (rxr->rx_ptag != NULL) {
4156 bus_dma_tag_destroy(rxr->rx_ptag);
4157 rxr->rx_ptag = NULL;
4161 static __inline void
4162 igb_rx_discard(struct rx_ring *rxr, union e1000_adv_rx_desc *cur, int i)
4165 if (rxr->fmp != NULL) {
4166 rxr->fmp->m_flags |= M_PKTHDR;
4167 m_freem(rxr->fmp);
4168 rxr->fmp = NULL;
4169 rxr->lmp = NULL;
4173 static __inline void
4174 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4178 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4179 * should be computed by hardware. Also it should not have VLAN tag in
4180 * ethernet header.
4182 #ifdef NET_LRO
4183 if (rxr->lro_enabled &&
4184 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4185 (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4186 (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4187 (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4188 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4189 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4191 * Send to the stack if:
4192 ** - LRO not enabled, or
4193 ** - no LRO resources, or
4194 ** - lro enqueue fails
4196 if (rxr->lro.lro_cnt != 0)
4197 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4198 return;
4200 #endif
4201 (*ifp->if_input)(ifp, m);
4204 /*********************************************************************
4206 * This routine executes in interrupt context. It replenishes
4207 * the mbufs in the descriptor and sends data which has been
4208 * dma'ed into host memory to upper layer.
4210 * We loop at most count times if count is > 0, or until done if
4211 * count < 0.
4213 * Return TRUE if more to clean, FALSE otherwise
4214 *********************************************************************/
4215 static bool
4216 igb_rxeof(struct rx_ring *rxr, int count)
4218 struct adapter *adapter = rxr->adapter;
4219 struct ifnet *ifp = adapter->ifp;
4220 #ifdef NET_LRO
4221 struct lro_ctrl *lro = &rxr->lro;
4222 struct lro_entry *queued;
4223 #endif
4224 int i, prog = 0;
4225 u32 ptype, staterr = 0;
4226 union e1000_adv_rx_desc *cur;
4228 IGB_RX_LOCK(rxr);
4230 /* Main clean loop */
4231 for (i = rxr->next_to_check; count > 0; prog++) {
4232 struct mbuf *sendmp, *mh, *mp;
4233 u16 hlen, plen, hdr, vtag;
4234 bool eop = FALSE;
4235 u8 dopayload;
4237 /* Sync the ring. */
4238 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4239 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4240 cur = &rxr->rx_base[i];
4241 staterr = le32toh(cur->wb.upper.status_error);
4242 if ((staterr & E1000_RXD_STAT_DD) == 0)
4243 break;
4244 if ((ifp->if_flags & IFF_RUNNING) == 0)
4245 break;
4246 count--;
4247 sendmp = mh = mp = NULL;
4248 cur->wb.upper.status_error = 0;
4249 plen = le16toh(cur->wb.upper.length);
4250 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4251 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4252 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4254 /* Make sure all segments of a bad packet are discarded */
4255 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4256 (rxr->discard)) {
4257 ifp->if_ierrors++;
4258 ++rxr->rx_discarded;
4259 if (!eop) /* Catch subsequent segs */
4260 rxr->discard = TRUE;
4261 else
4262 rxr->discard = FALSE;
4263 igb_rx_discard(rxr, cur, i);
4264 goto next_desc;
4268 ** The way the hardware is configured to
4269 ** split, it will ONLY use the header buffer
4270 ** when header split is enabled, otherwise we
4271 ** get normal behavior, ie, both header and
4272 ** payload are DMA'd into the payload buffer.
4274 ** The fmp test is to catch the case where a
4275 ** packet spans multiple descriptors, in that
4276 ** case only the first header is valid.
4278 if (rxr->hdr_split && rxr->fmp == NULL) {
4279 hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4280 E1000_RXDADV_HDRBUFLEN_SHIFT;
4281 if (hlen > IGB_HDR_BUF)
4282 hlen = IGB_HDR_BUF;
4283 /* Handle the header mbuf */
4284 mh = rxr->rx_buffers[i].m_head;
4285 mh->m_len = hlen;
4286 dopayload = IGB_CLEAN_HEADER;
4288 ** Get the payload length, this
4289 ** could be zero if its a small
4290 ** packet.
4292 if (plen > 0) {
4293 mp = rxr->rx_buffers[i].m_pack;
4294 mp->m_len = plen;
4295 mh->m_next = mp;
4296 dopayload = IGB_CLEAN_BOTH;
4297 rxr->rx_split_packets++;
4299 } else {
4301 ** Either no header split, or a
4302 ** secondary piece of a fragmented
4303 ** split packet.
4305 mh = rxr->rx_buffers[i].m_pack;
4306 mh->m_len = plen;
4307 dopayload = IGB_CLEAN_PAYLOAD;
4311 ** get_buf will overwrite the writeback
4312 ** descriptor so save the VLAN tag now.
4314 vtag = le16toh(cur->wb.upper.vlan);
4315 if (igb_get_buf(rxr, i, dopayload) != 0) {
4316 ifp->if_iqdrops++;
4318 * We've dropped a frame due to lack of resources
4319 * so we should drop entire multi-segmented
4320 * frames until we encounter EOP.
4322 if ((staterr & E1000_RXD_STAT_EOP) != 0)
4323 rxr->discard = TRUE;
4324 igb_rx_discard(rxr, cur, i);
4325 goto next_desc;
4328 /* Initial frame - setup */
4329 if (rxr->fmp == NULL) {
4330 mh->m_pkthdr.len = mh->m_len;
4331 /* Store the first mbuf */
4332 rxr->fmp = mh;
4333 rxr->lmp = mh;
4334 if (mp != NULL) {
4335 /* Add payload if split */
4336 mh->m_pkthdr.len += mp->m_len;
4337 rxr->lmp = mh->m_next;
4339 } else {
4340 /* Chain mbuf's together */
4341 rxr->lmp->m_next = mh;
4342 rxr->lmp = rxr->lmp->m_next;
4343 rxr->fmp->m_pkthdr.len += mh->m_len;
4346 if (eop) {
4347 rxr->fmp->m_pkthdr.rcvif = ifp;
4348 ifp->if_ipackets++;
4349 rxr->rx_packets++;
4350 /* capture data for AIM */
4351 rxr->packets++;
4352 rxr->bytes += rxr->fmp->m_pkthdr.len;
4353 rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4355 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4356 igb_rx_checksum(staterr, rxr->fmp, ptype);
4357 /* XXX igb(4) always strips VLAN. */
4358 if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4359 (staterr & E1000_RXD_STAT_VP) != 0) {
4360 rxr->fmp->m_pkthdr.ether_vlantag = vtag;
4361 rxr->fmp->m_flags |= M_VLANTAG;
4363 #if __FreeBSD_version >= 800000
4364 rxr->fmp->m_pkthdr.flowid = curcpu;
4365 rxr->fmp->m_flags |= M_FLOWID;
4366 #endif
4367 sendmp = rxr->fmp;
4368 /* Make sure to set M_PKTHDR. */
4369 sendmp->m_flags |= M_PKTHDR;
4370 rxr->fmp = NULL;
4371 rxr->lmp = NULL;
4374 next_desc:
4375 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4376 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4378 rxr->last_cleaned = i; /* For updating tail */
4380 /* Advance our pointers to the next descriptor. */
4381 if (++i == adapter->num_rx_desc)
4382 i = 0;
4385 ** Note that we hold the RX lock thru
4386 ** the following call so this ring's
4387 ** next_to_check is not gonna change.
4389 if (sendmp != NULL)
4390 igb_rx_input(rxr, ifp, sendmp, ptype);
4393 if (prog == 0) {
4394 IGB_RX_UNLOCK(rxr);
4395 return (FALSE);
4398 rxr->next_to_check = i;
4400 /* Advance the E1000's Receive Queue "Tail Pointer". */
4401 E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
4404 * Flush any outstanding LRO work
4406 #ifdef NET_LRO
4407 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4408 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4409 tcp_lro_flush(lro, queued);
4411 #endif
4413 IGB_RX_UNLOCK(rxr);
4416 ** We still have cleaning to do?
4417 ** Schedule another interrupt if so.
4419 if ((staterr & E1000_RXD_STAT_DD) != 0)
4420 return (TRUE);
4422 return (FALSE);
4425 /*********************************************************************
4427 * Verify that the hardware indicated that the checksum is valid.
4428 * Inform the stack about the status of checksum so that stack
4429 * doesn't spend time verifying the checksum.
4431 *********************************************************************/
4432 static void
4433 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4435 u16 status = (u16)staterr;
4436 u8 errors = (u8) (staterr >> 24);
4437 int sctp;
4439 /* Ignore Checksum bit is set */
4440 if (status & E1000_RXD_STAT_IXSM) {
4441 mp->m_pkthdr.csum_flags = 0;
4442 return;
4445 if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4446 (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4447 sctp = 1;
4448 else
4449 sctp = 0;
4450 if (status & E1000_RXD_STAT_IPCS) {
4451 /* Did it pass? */
4452 if (!(errors & E1000_RXD_ERR_IPE)) {
4453 /* IP Checksum Good */
4454 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4455 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4456 } else
4457 mp->m_pkthdr.csum_flags = 0;
4460 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4461 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4462 #if __FreeBSD_version >= 800000
4463 if (sctp) /* reassign */
4464 type = CSUM_SCTP_VALID;
4465 #endif
4466 /* Did it pass? */
4467 if (!(errors & E1000_RXD_ERR_TCPE)) {
4468 mp->m_pkthdr.csum_flags |= type;
4469 if (sctp == 0)
4470 mp->m_pkthdr.csum_data = htons(0xffff);
4473 return;
4477 * This routine is run via an vlan
4478 * config EVENT
4480 static void
4481 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4483 struct adapter *adapter = ifp->if_softc;
4484 u32 index, bit;
4486 if (ifp->if_softc != arg) /* Not our event */
4487 return;
4489 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4490 return;
4492 index = (vtag >> 5) & 0x7F;
4493 bit = vtag & 0x1F;
4494 igb_shadow_vfta[index] |= (1 << bit);
4495 ++adapter->num_vlans;
4496 /* Re-init to load the changes */
4497 igb_init(adapter);
4501 * This routine is run via an vlan
4502 * unconfig EVENT
4504 static void
4505 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4507 struct adapter *adapter = ifp->if_softc;
4508 u32 index, bit;
4510 if (ifp->if_softc != arg)
4511 return;
4513 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
4514 return;
4516 index = (vtag >> 5) & 0x7F;
4517 bit = vtag & 0x1F;
4518 igb_shadow_vfta[index] &= ~(1 << bit);
4519 --adapter->num_vlans;
4520 /* Re-init to load the changes */
4521 igb_init(adapter);
4524 static void
4525 igb_setup_vlan_hw_support(struct adapter *adapter)
4527 struct e1000_hw *hw = &adapter->hw;
4528 u32 reg;
4531 ** We get here thru init_locked, meaning
4532 ** a soft reset, this has already cleared
4533 ** the VFTA and other state, so if there
4534 ** have been no vlan's registered do nothing.
4536 if (adapter->num_vlans == 0)
4537 return;
4540 ** A soft reset zero's out the VFTA, so
4541 ** we need to repopulate it now.
4543 for (int i = 0; i < IGB_VFTA_SIZE; i++)
4544 if (igb_shadow_vfta[i] != 0)
4545 E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4546 i, igb_shadow_vfta[i]);
4548 reg = E1000_READ_REG(hw, E1000_CTRL);
4549 reg |= E1000_CTRL_VME;
4550 E1000_WRITE_REG(hw, E1000_CTRL, reg);
4552 /* Enable the Filter Table */
4553 reg = E1000_READ_REG(hw, E1000_RCTL);
4554 reg &= ~E1000_RCTL_CFIEN;
4555 reg |= E1000_RCTL_VFE;
4556 E1000_WRITE_REG(hw, E1000_RCTL, reg);
4558 /* Update the frame size */
4559 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4560 adapter->max_frame_size + VLAN_TAG_SIZE);
4563 static void
4564 igb_enable_intr(struct adapter *adapter)
4566 /* With RSS set up what to auto clear */
4567 if (adapter->msix_mem) {
4568 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4569 adapter->eims_mask);
4570 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4571 adapter->eims_mask);
4572 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4573 adapter->eims_mask);
4574 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4575 E1000_IMS_LSC);
4576 } else {
4577 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4578 IMS_ENABLE_MASK);
4580 E1000_WRITE_FLUSH(&adapter->hw);
4582 return;
4585 static void
4586 igb_disable_intr(struct adapter *adapter)
4588 if (adapter->msix_mem) {
4589 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4590 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4592 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4593 E1000_WRITE_FLUSH(&adapter->hw);
4594 return;
4598 * Bit of a misnomer, what this really means is
4599 * to enable OS management of the system... aka
4600 * to disable special hardware management features
4602 static void
4603 igb_init_manageability(struct adapter *adapter)
4605 if (adapter->has_manage) {
4606 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4607 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4609 /* disable hardware interception of ARP */
4610 manc &= ~(E1000_MANC_ARP_EN);
4612 /* enable receiving management packets to the host */
4613 manc |= E1000_MANC_EN_MNG2HOST;
4614 manc2h |= 1 << 5; /* Mng Port 623 */
4615 manc2h |= 1 << 6; /* Mng Port 664 */
4616 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4617 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4622 * Give control back to hardware management
4623 * controller if there is one.
4625 static void
4626 igb_release_manageability(struct adapter *adapter)
4628 if (adapter->has_manage) {
4629 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4631 /* re-enable hardware interception of ARP */
4632 manc |= E1000_MANC_ARP_EN;
4633 manc &= ~E1000_MANC_EN_MNG2HOST;
4635 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4640 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4641 * For ASF and Pass Through versions of f/w this means that
4642 * the driver is loaded.
4645 static void
4646 igb_get_hw_control(struct adapter *adapter)
4648 u32 ctrl_ext;
4650 /* Let firmware know the driver has taken over */
4651 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4652 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4653 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4657 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4658 * For ASF and Pass Through versions of f/w this means that the
4659 * driver is no longer loaded.
4662 static void
4663 igb_release_hw_control(struct adapter *adapter)
4665 u32 ctrl_ext;
4667 /* Let firmware taken over control of h/w */
4668 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4669 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4670 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4673 static int
4674 igb_is_valid_ether_addr(uint8_t *addr)
4676 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4678 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4679 return (FALSE);
4682 return (TRUE);
4687 * Enable PCI Wake On Lan capability
4689 void
4690 igb_enable_wakeup(device_t dev)
4692 u16 cap, status;
4693 u8 id;
4695 /* First find the capabilities pointer*/
4696 cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4697 /* Read the PM Capabilities */
4698 id = pci_read_config(dev, cap, 1);
4699 if (id != PCIY_PMG) /* Something wrong */
4700 return;
4701 /* OK, we have the power capabilities, so
4702 now get the status register */
4703 cap += PCIR_POWER_STATUS;
4704 status = pci_read_config(dev, cap, 2);
4705 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4706 pci_write_config(dev, cap, status, 2);
4707 return;
4711 /**********************************************************************
4713 * Update the board statistics counters.
4715 **********************************************************************/
4716 static void
4717 igb_update_stats_counters(struct adapter *adapter)
4719 struct ifnet *ifp;
4721 if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4722 (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4723 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4724 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4726 adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4727 adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4728 adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4729 adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4731 adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4732 adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4733 adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4734 adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4735 adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4736 adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4737 adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4738 adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4739 adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4740 adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4741 adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4742 adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4743 adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4744 adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4745 adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4746 adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4747 adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4748 adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4749 adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4750 adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4752 /* For the 64-bit byte counters the low dword must be read first. */
4753 /* Both registers clear on the read of the high dword */
4755 adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4756 adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4758 adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4759 adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4760 adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4761 adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4762 adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4764 adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4765 adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4767 adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4768 adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4769 adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4770 adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4771 adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4772 adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4773 adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4774 adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4775 adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4776 adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4778 adapter->stats.algnerrc +=
4779 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4780 adapter->stats.rxerrc +=
4781 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4782 adapter->stats.tncrs +=
4783 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4784 adapter->stats.cexterr +=
4785 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4786 adapter->stats.tsctc +=
4787 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4788 adapter->stats.tsctfc +=
4789 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4790 ifp = adapter->ifp;
4792 ifp->if_collisions = adapter->stats.colc;
4794 /* Rx Errors */
4795 ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4796 adapter->stats.crcerrs + adapter->stats.algnerrc +
4797 adapter->stats.ruc + adapter->stats.roc +
4798 adapter->stats.mpc + adapter->stats.cexterr;
4800 /* Tx Errors */
4801 ifp->if_oerrors = adapter->stats.ecol +
4802 adapter->stats.latecol + adapter->watchdog_events;
4806 /**********************************************************************
4808 * This routine is called only when igb_display_debug_stats is enabled.
4809 * This routine provides a way to take a look at important statistics
4810 * maintained by the driver and hardware.
4812 **********************************************************************/
4813 static void
4814 igb_print_debug_info(struct adapter *adapter)
4816 device_t dev = adapter->dev;
4817 struct igb_queue *que = adapter->queues;
4818 struct rx_ring *rxr = adapter->rx_rings;
4819 struct tx_ring *txr = adapter->tx_rings;
4820 uint8_t *hw_addr = adapter->hw.hw_addr;
4822 device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4823 device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4824 E1000_READ_REG(&adapter->hw, E1000_CTRL),
4825 E1000_READ_REG(&adapter->hw, E1000_RCTL));
4827 #if (DEBUG_HW > 0) /* Dont output these errors normally */
4828 device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4829 E1000_READ_REG(&adapter->hw, E1000_IMS),
4830 E1000_READ_REG(&adapter->hw, E1000_EIMS));
4831 #endif
4833 device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4834 ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4835 (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4836 device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4837 adapter->hw.fc.high_water,
4838 adapter->hw.fc.low_water);
4840 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
4841 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d ", i,
4842 E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4843 E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4844 device_printf(dev, "rdh = %d, rdt = %d\n",
4845 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4846 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4847 device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4848 txr->me, (long long)txr->no_desc_avail);
4849 device_printf(dev, "TX(%d) Packets sent = %lld\n",
4850 txr->me, (long long)txr->tx_packets);
4851 device_printf(dev, "RX(%d) Packets received = %lld ",
4852 rxr->me, (long long)rxr->rx_packets);
4855 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4856 #ifdef NET_LRO
4857 struct lro_ctrl *lro = &rxr->lro;
4858 #endif
4859 device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4860 E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4861 E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4862 device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4863 (long long)rxr->rx_packets);
4864 device_printf(dev, " Split Packets = %lld ",
4865 (long long)rxr->rx_split_packets);
4866 device_printf(dev, " Byte count = %lld\n",
4867 (long long)rxr->rx_bytes);
4868 #ifdef NET_LRO
4869 device_printf(dev,"RX(%d) LRO Queued= %d ",
4870 i, lro->lro_queued);
4871 device_printf(dev,"LRO Flushed= %d\n",lro->lro_flushed);
4872 #endif
4875 for (int i = 0; i < adapter->num_queues; i++, que++)
4876 device_printf(dev,"QUE(%d) IRQs = %llx\n",
4877 i, (long long)que->irqs);
4879 device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4880 device_printf(dev, "Mbuf defrag failed = %ld\n",
4881 adapter->mbuf_defrag_failed);
4882 device_printf(dev, "Std mbuf header failed = %ld\n",
4883 adapter->mbuf_header_failed);
4884 device_printf(dev, "Std mbuf packet failed = %ld\n",
4885 adapter->mbuf_packet_failed);
4886 device_printf(dev, "Driver dropped packets = %ld\n",
4887 adapter->dropped_pkts);
4888 device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4889 adapter->no_tx_dma_setup);
4892 static void
4893 igb_print_hw_stats(struct adapter *adapter)
4895 device_t dev = adapter->dev;
4897 device_printf(dev, "Excessive collisions = %lld\n",
4898 (long long)adapter->stats.ecol);
4899 #if (DEBUG_HW > 0) /* Dont output these errors normally */
4900 device_printf(dev, "Symbol errors = %lld\n",
4901 (long long)adapter->stats.symerrs);
4902 #endif
4903 device_printf(dev, "Sequence errors = %lld\n",
4904 (long long)adapter->stats.sec);
4905 device_printf(dev, "Defer count = %lld\n",
4906 (long long)adapter->stats.dc);
4907 device_printf(dev, "Missed Packets = %lld\n",
4908 (long long)adapter->stats.mpc);
4909 device_printf(dev, "Receive No Buffers = %lld\n",
4910 (long long)adapter->stats.rnbc);
4911 /* RLEC is inaccurate on some hardware, calculate our own. */
4912 device_printf(dev, "Receive Length Errors = %lld\n",
4913 ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4914 device_printf(dev, "Receive errors = %lld\n",
4915 (long long)adapter->stats.rxerrc);
4916 device_printf(dev, "Crc errors = %lld\n",
4917 (long long)adapter->stats.crcerrs);
4918 device_printf(dev, "Alignment errors = %lld\n",
4919 (long long)adapter->stats.algnerrc);
4920 /* On 82575 these are collision counts */
4921 device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4922 (long long)adapter->stats.cexterr);
4923 device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4924 device_printf(dev, "watchdog timeouts = %ld\n",
4925 adapter->watchdog_events);
4926 device_printf(dev, "XON Rcvd = %lld\n",
4927 (long long)adapter->stats.xonrxc);
4928 device_printf(dev, "XON Xmtd = %lld\n",
4929 (long long)adapter->stats.xontxc);
4930 device_printf(dev, "XOFF Rcvd = %lld\n",
4931 (long long)adapter->stats.xoffrxc);
4932 device_printf(dev, "XOFF Xmtd = %lld\n",
4933 (long long)adapter->stats.xofftxc);
4934 device_printf(dev, "Good Packets Rcvd = %lld\n",
4935 (long long)adapter->stats.gprc);
4936 device_printf(dev, "Good Packets Xmtd = %lld\n",
4937 (long long)adapter->stats.gptc);
4938 device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4939 (long long)adapter->stats.tsctc);
4940 device_printf(dev, "TSO Contexts Failed = %lld\n",
4941 (long long)adapter->stats.tsctfc);
4944 /**********************************************************************
4946 * This routine provides a way to dump out the adapter eeprom,
4947 * often a useful debug/service tool. This only dumps the first
4948 * 32 words, stuff that matters is in that extent.
4950 **********************************************************************/
4951 static void
4952 igb_print_nvm_info(struct adapter *adapter)
4954 u16 eeprom_data;
4955 int i, j, row = 0;
4957 /* Its a bit crude, but it gets the job done */
4958 kprintf("\nInterface EEPROM Dump:\n");
4959 kprintf("Offset\n0x0000 ");
4960 for (i = 0, j = 0; i < 32; i++, j++) {
4961 if (j == 8) { /* Make the offset block */
4962 j = 0; ++row;
4963 kprintf("\n0x00%x0 ",row);
4965 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4966 kprintf("%04x ", eeprom_data);
4968 kprintf("\n");
4971 static int
4972 igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4974 struct adapter *adapter;
4975 int error;
4976 int result;
4978 result = -1;
4979 error = sysctl_handle_int(oidp, &result, 0, req);
4981 if (error || !req->newptr)
4982 return (error);
4984 if (result == 1) {
4985 adapter = (struct adapter *)arg1;
4986 igb_print_debug_info(adapter);
4989 * This value will cause a hex dump of the
4990 * first 32 16-bit words of the EEPROM to
4991 * the screen.
4993 if (result == 2) {
4994 adapter = (struct adapter *)arg1;
4995 igb_print_nvm_info(adapter);
4998 return (error);
5002 static int
5003 igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
5005 struct adapter *adapter;
5006 int error;
5007 int result;
5009 result = -1;
5010 error = sysctl_handle_int(oidp, &result, 0, req);
5012 if (error || !req->newptr)
5013 return (error);
5015 if (result == 1) {
5016 adapter = (struct adapter *)arg1;
5017 igb_print_hw_stats(adapter);
5020 return (error);
5023 static void
5024 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5025 const char *description, int *limit, int value)
5027 *limit = value;
5028 SYSCTL_ADD_INT(&adapter->sysctl_ctx,
5029 SYSCTL_CHILDREN(adapter->sysctl_tree),
5030 OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);