igb: add basic runtime PM support
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / drivers / net / ethernet / intel / igb / igb_main.c
blobfac71e21e0a48fd3e358113e881e49046b636b43
1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
62 #define MAJ 3
63 #define MIN 2
64 #define BUILD 10
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
73 static const struct e1000_info *igb_info_tbl[] = {
74 [board_82575] = &e1000_82575_info,
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103 /* required last entry */
104 {0, }
107 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
109 void igb_reset(struct igb_adapter *);
110 static int igb_setup_all_tx_resources(struct igb_adapter *);
111 static int igb_setup_all_rx_resources(struct igb_adapter *);
112 static void igb_free_all_tx_resources(struct igb_adapter *);
113 static void igb_free_all_rx_resources(struct igb_adapter *);
114 static void igb_setup_mrqc(struct igb_adapter *);
115 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116 static void __devexit igb_remove(struct pci_dev *pdev);
117 static void igb_init_hw_timer(struct igb_adapter *adapter);
118 static int igb_sw_init(struct igb_adapter *);
119 static int igb_open(struct net_device *);
120 static int igb_close(struct net_device *);
121 static void igb_configure_tx(struct igb_adapter *);
122 static void igb_configure_rx(struct igb_adapter *);
123 static void igb_clean_all_tx_rings(struct igb_adapter *);
124 static void igb_clean_all_rx_rings(struct igb_adapter *);
125 static void igb_clean_tx_ring(struct igb_ring *);
126 static void igb_clean_rx_ring(struct igb_ring *);
127 static void igb_set_rx_mode(struct net_device *);
128 static void igb_update_phy_info(unsigned long);
129 static void igb_watchdog(unsigned long);
130 static void igb_watchdog_task(struct work_struct *);
131 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133 struct rtnl_link_stats64 *stats);
134 static int igb_change_mtu(struct net_device *, int);
135 static int igb_set_mac(struct net_device *, void *);
136 static void igb_set_uta(struct igb_adapter *adapter);
137 static irqreturn_t igb_intr(int irq, void *);
138 static irqreturn_t igb_intr_msi(int irq, void *);
139 static irqreturn_t igb_msix_other(int irq, void *);
140 static irqreturn_t igb_msix_ring(int irq, void *);
141 #ifdef CONFIG_IGB_DCA
142 static void igb_update_dca(struct igb_q_vector *);
143 static void igb_setup_dca(struct igb_adapter *);
144 #endif /* CONFIG_IGB_DCA */
145 static int igb_poll(struct napi_struct *, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector *);
147 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149 static void igb_tx_timeout(struct net_device *);
150 static void igb_reset_task(struct work_struct *);
151 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152 static int igb_vlan_rx_add_vid(struct net_device *, u16);
153 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154 static void igb_restore_vlan(struct igb_adapter *);
155 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156 static void igb_ping_all_vfs(struct igb_adapter *);
157 static void igb_msg_task(struct igb_adapter *);
158 static void igb_vmm_control(struct igb_adapter *);
159 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163 int vf, u16 vlan, u8 qos);
164 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166 struct ifla_vf_info *ivi);
167 static void igb_check_vf_rate_limit(struct igb_adapter *);
169 #ifdef CONFIG_PCI_IOV
170 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172 static int igb_check_vf_assignment(struct igb_adapter *adapter);
173 #endif
175 #ifdef CONFIG_PM
176 static int igb_suspend(struct device *);
177 static int igb_resume(struct device *);
178 #ifdef CONFIG_PM_RUNTIME
179 static int igb_runtime_suspend(struct device *dev);
180 static int igb_runtime_resume(struct device *dev);
181 static int igb_runtime_idle(struct device *dev);
182 #endif
183 static const struct dev_pm_ops igb_pm_ops = {
184 SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
185 SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
186 igb_runtime_idle)
188 #endif
189 static void igb_shutdown(struct pci_dev *);
190 #ifdef CONFIG_IGB_DCA
191 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
192 static struct notifier_block dca_notifier = {
193 .notifier_call = igb_notify_dca,
194 .next = NULL,
195 .priority = 0
197 #endif
198 #ifdef CONFIG_NET_POLL_CONTROLLER
199 /* for netdump / net console */
200 static void igb_netpoll(struct net_device *);
201 #endif
202 #ifdef CONFIG_PCI_IOV
203 static unsigned int max_vfs = 0;
204 module_param(max_vfs, uint, 0);
205 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
206 "per physical function");
207 #endif /* CONFIG_PCI_IOV */
209 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
210 pci_channel_state_t);
211 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
212 static void igb_io_resume(struct pci_dev *);
214 static struct pci_error_handlers igb_err_handler = {
215 .error_detected = igb_io_error_detected,
216 .slot_reset = igb_io_slot_reset,
217 .resume = igb_io_resume,
220 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
222 static struct pci_driver igb_driver = {
223 .name = igb_driver_name,
224 .id_table = igb_pci_tbl,
225 .probe = igb_probe,
226 .remove = __devexit_p(igb_remove),
227 #ifdef CONFIG_PM
228 .driver.pm = &igb_pm_ops,
229 #endif
230 .shutdown = igb_shutdown,
231 .err_handler = &igb_err_handler
234 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
235 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
236 MODULE_LICENSE("GPL");
237 MODULE_VERSION(DRV_VERSION);
239 struct igb_reg_info {
240 u32 ofs;
241 char *name;
244 static const struct igb_reg_info igb_reg_info_tbl[] = {
246 /* General Registers */
247 {E1000_CTRL, "CTRL"},
248 {E1000_STATUS, "STATUS"},
249 {E1000_CTRL_EXT, "CTRL_EXT"},
251 /* Interrupt Registers */
252 {E1000_ICR, "ICR"},
254 /* RX Registers */
255 {E1000_RCTL, "RCTL"},
256 {E1000_RDLEN(0), "RDLEN"},
257 {E1000_RDH(0), "RDH"},
258 {E1000_RDT(0), "RDT"},
259 {E1000_RXDCTL(0), "RXDCTL"},
260 {E1000_RDBAL(0), "RDBAL"},
261 {E1000_RDBAH(0), "RDBAH"},
263 /* TX Registers */
264 {E1000_TCTL, "TCTL"},
265 {E1000_TDBAL(0), "TDBAL"},
266 {E1000_TDBAH(0), "TDBAH"},
267 {E1000_TDLEN(0), "TDLEN"},
268 {E1000_TDH(0), "TDH"},
269 {E1000_TDT(0), "TDT"},
270 {E1000_TXDCTL(0), "TXDCTL"},
271 {E1000_TDFH, "TDFH"},
272 {E1000_TDFT, "TDFT"},
273 {E1000_TDFHS, "TDFHS"},
274 {E1000_TDFPC, "TDFPC"},
276 /* List Terminator */
281 * igb_regdump - register printout routine
283 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
285 int n = 0;
286 char rname[16];
287 u32 regs[8];
289 switch (reginfo->ofs) {
290 case E1000_RDLEN(0):
291 for (n = 0; n < 4; n++)
292 regs[n] = rd32(E1000_RDLEN(n));
293 break;
294 case E1000_RDH(0):
295 for (n = 0; n < 4; n++)
296 regs[n] = rd32(E1000_RDH(n));
297 break;
298 case E1000_RDT(0):
299 for (n = 0; n < 4; n++)
300 regs[n] = rd32(E1000_RDT(n));
301 break;
302 case E1000_RXDCTL(0):
303 for (n = 0; n < 4; n++)
304 regs[n] = rd32(E1000_RXDCTL(n));
305 break;
306 case E1000_RDBAL(0):
307 for (n = 0; n < 4; n++)
308 regs[n] = rd32(E1000_RDBAL(n));
309 break;
310 case E1000_RDBAH(0):
311 for (n = 0; n < 4; n++)
312 regs[n] = rd32(E1000_RDBAH(n));
313 break;
314 case E1000_TDBAL(0):
315 for (n = 0; n < 4; n++)
316 regs[n] = rd32(E1000_RDBAL(n));
317 break;
318 case E1000_TDBAH(0):
319 for (n = 0; n < 4; n++)
320 regs[n] = rd32(E1000_TDBAH(n));
321 break;
322 case E1000_TDLEN(0):
323 for (n = 0; n < 4; n++)
324 regs[n] = rd32(E1000_TDLEN(n));
325 break;
326 case E1000_TDH(0):
327 for (n = 0; n < 4; n++)
328 regs[n] = rd32(E1000_TDH(n));
329 break;
330 case E1000_TDT(0):
331 for (n = 0; n < 4; n++)
332 regs[n] = rd32(E1000_TDT(n));
333 break;
334 case E1000_TXDCTL(0):
335 for (n = 0; n < 4; n++)
336 regs[n] = rd32(E1000_TXDCTL(n));
337 break;
338 default:
339 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
340 return;
343 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
344 pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
345 regs[2], regs[3]);
349 * igb_dump - Print registers, tx-rings and rx-rings
351 static void igb_dump(struct igb_adapter *adapter)
353 struct net_device *netdev = adapter->netdev;
354 struct e1000_hw *hw = &adapter->hw;
355 struct igb_reg_info *reginfo;
356 struct igb_ring *tx_ring;
357 union e1000_adv_tx_desc *tx_desc;
358 struct my_u0 { u64 a; u64 b; } *u0;
359 struct igb_ring *rx_ring;
360 union e1000_adv_rx_desc *rx_desc;
361 u32 staterr;
362 u16 i, n;
364 if (!netif_msg_hw(adapter))
365 return;
367 /* Print netdevice Info */
368 if (netdev) {
369 dev_info(&adapter->pdev->dev, "Net device Info\n");
370 pr_info("Device Name state trans_start "
371 "last_rx\n");
372 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
373 netdev->state, netdev->trans_start, netdev->last_rx);
376 /* Print Registers */
377 dev_info(&adapter->pdev->dev, "Register Dump\n");
378 pr_info(" Register Name Value\n");
379 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
380 reginfo->name; reginfo++) {
381 igb_regdump(hw, reginfo);
384 /* Print TX Ring Summary */
385 if (!netdev || !netif_running(netdev))
386 goto exit;
388 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
389 pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n");
390 for (n = 0; n < adapter->num_tx_queues; n++) {
391 struct igb_tx_buffer *buffer_info;
392 tx_ring = adapter->tx_ring[n];
393 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
394 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
395 n, tx_ring->next_to_use, tx_ring->next_to_clean,
396 (u64)buffer_info->dma,
397 buffer_info->length,
398 buffer_info->next_to_watch,
399 (u64)buffer_info->time_stamp);
402 /* Print TX Rings */
403 if (!netif_msg_tx_done(adapter))
404 goto rx_ring_summary;
406 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
408 /* Transmit Descriptor Formats
410 * Advanced Transmit Descriptor
411 * +--------------------------------------------------------------+
412 * 0 | Buffer Address [63:0] |
413 * +--------------------------------------------------------------+
414 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
415 * +--------------------------------------------------------------+
416 * 63 46 45 40 39 38 36 35 32 31 24 15 0
419 for (n = 0; n < adapter->num_tx_queues; n++) {
420 tx_ring = adapter->tx_ring[n];
421 pr_info("------------------------------------\n");
422 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
423 pr_info("------------------------------------\n");
424 pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] "
425 "[bi->dma ] leng ntw timestamp "
426 "bi->skb\n");
428 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
429 const char *next_desc;
430 struct igb_tx_buffer *buffer_info;
431 tx_desc = IGB_TX_DESC(tx_ring, i);
432 buffer_info = &tx_ring->tx_buffer_info[i];
433 u0 = (struct my_u0 *)tx_desc;
434 if (i == tx_ring->next_to_use &&
435 i == tx_ring->next_to_clean)
436 next_desc = " NTC/U";
437 else if (i == tx_ring->next_to_use)
438 next_desc = " NTU";
439 else if (i == tx_ring->next_to_clean)
440 next_desc = " NTC";
441 else
442 next_desc = "";
444 pr_info("T [0x%03X] %016llX %016llX %016llX"
445 " %04X %p %016llX %p%s\n", i,
446 le64_to_cpu(u0->a),
447 le64_to_cpu(u0->b),
448 (u64)buffer_info->dma,
449 buffer_info->length,
450 buffer_info->next_to_watch,
451 (u64)buffer_info->time_stamp,
452 buffer_info->skb, next_desc);
454 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
455 print_hex_dump(KERN_INFO, "",
456 DUMP_PREFIX_ADDRESS,
457 16, 1, phys_to_virt(buffer_info->dma),
458 buffer_info->length, true);
462 /* Print RX Rings Summary */
463 rx_ring_summary:
464 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
465 pr_info("Queue [NTU] [NTC]\n");
466 for (n = 0; n < adapter->num_rx_queues; n++) {
467 rx_ring = adapter->rx_ring[n];
468 pr_info(" %5d %5X %5X\n",
469 n, rx_ring->next_to_use, rx_ring->next_to_clean);
472 /* Print RX Rings */
473 if (!netif_msg_rx_status(adapter))
474 goto exit;
476 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
478 /* Advanced Receive Descriptor (Read) Format
479 * 63 1 0
480 * +-----------------------------------------------------+
481 * 0 | Packet Buffer Address [63:1] |A0/NSE|
482 * +----------------------------------------------+------+
483 * 8 | Header Buffer Address [63:1] | DD |
484 * +-----------------------------------------------------+
487 * Advanced Receive Descriptor (Write-Back) Format
489 * 63 48 47 32 31 30 21 20 17 16 4 3 0
490 * +------------------------------------------------------+
491 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
492 * | Checksum Ident | | | | Type | Type |
493 * +------------------------------------------------------+
494 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
495 * +------------------------------------------------------+
496 * 63 48 47 32 31 20 19 0
499 for (n = 0; n < adapter->num_rx_queues; n++) {
500 rx_ring = adapter->rx_ring[n];
501 pr_info("------------------------------------\n");
502 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
503 pr_info("------------------------------------\n");
504 pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] "
505 "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n");
506 pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----"
507 "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
509 for (i = 0; i < rx_ring->count; i++) {
510 const char *next_desc;
511 struct igb_rx_buffer *buffer_info;
512 buffer_info = &rx_ring->rx_buffer_info[i];
513 rx_desc = IGB_RX_DESC(rx_ring, i);
514 u0 = (struct my_u0 *)rx_desc;
515 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
517 if (i == rx_ring->next_to_use)
518 next_desc = " NTU";
519 else if (i == rx_ring->next_to_clean)
520 next_desc = " NTC";
521 else
522 next_desc = "";
524 if (staterr & E1000_RXD_STAT_DD) {
525 /* Descriptor Done */
526 pr_info("%s[0x%03X] %016llX %016llX -------"
527 "--------- %p%s\n", "RWB", i,
528 le64_to_cpu(u0->a),
529 le64_to_cpu(u0->b),
530 buffer_info->skb, next_desc);
531 } else {
532 pr_info("%s[0x%03X] %016llX %016llX %016llX"
533 " %p%s\n", "R ", i,
534 le64_to_cpu(u0->a),
535 le64_to_cpu(u0->b),
536 (u64)buffer_info->dma,
537 buffer_info->skb, next_desc);
539 if (netif_msg_pktdata(adapter)) {
540 print_hex_dump(KERN_INFO, "",
541 DUMP_PREFIX_ADDRESS,
542 16, 1,
543 phys_to_virt(buffer_info->dma),
544 IGB_RX_HDR_LEN, true);
545 print_hex_dump(KERN_INFO, "",
546 DUMP_PREFIX_ADDRESS,
547 16, 1,
548 phys_to_virt(
549 buffer_info->page_dma +
550 buffer_info->page_offset),
551 PAGE_SIZE/2, true);
557 exit:
558 return;
563 * igb_read_clock - read raw cycle counter (to be used by time counter)
565 static cycle_t igb_read_clock(const struct cyclecounter *tc)
567 struct igb_adapter *adapter =
568 container_of(tc, struct igb_adapter, cycles);
569 struct e1000_hw *hw = &adapter->hw;
570 u64 stamp = 0;
571 int shift = 0;
574 * The timestamp latches on lowest register read. For the 82580
575 * the lowest register is SYSTIMR instead of SYSTIML. However we never
576 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
578 if (hw->mac.type >= e1000_82580) {
579 stamp = rd32(E1000_SYSTIMR) >> 8;
580 shift = IGB_82580_TSYNC_SHIFT;
583 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
584 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
585 return stamp;
589 * igb_get_hw_dev - return device
590 * used by hardware layer to print debugging information
592 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
594 struct igb_adapter *adapter = hw->back;
595 return adapter->netdev;
599 * igb_init_module - Driver Registration Routine
601 * igb_init_module is the first routine called when the driver is
602 * loaded. All it does is register with the PCI subsystem.
604 static int __init igb_init_module(void)
606 int ret;
607 pr_info("%s - version %s\n",
608 igb_driver_string, igb_driver_version);
610 pr_info("%s\n", igb_copyright);
612 #ifdef CONFIG_IGB_DCA
613 dca_register_notify(&dca_notifier);
614 #endif
615 ret = pci_register_driver(&igb_driver);
616 return ret;
619 module_init(igb_init_module);
622 * igb_exit_module - Driver Exit Cleanup Routine
624 * igb_exit_module is called just before the driver is removed
625 * from memory.
627 static void __exit igb_exit_module(void)
629 #ifdef CONFIG_IGB_DCA
630 dca_unregister_notify(&dca_notifier);
631 #endif
632 pci_unregister_driver(&igb_driver);
635 module_exit(igb_exit_module);
637 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
639 * igb_cache_ring_register - Descriptor ring to register mapping
640 * @adapter: board private structure to initialize
642 * Once we know the feature-set enabled for the device, we'll cache
643 * the register offset the descriptor ring is assigned to.
645 static void igb_cache_ring_register(struct igb_adapter *adapter)
647 int i = 0, j = 0;
648 u32 rbase_offset = adapter->vfs_allocated_count;
650 switch (adapter->hw.mac.type) {
651 case e1000_82576:
652 /* The queues are allocated for virtualization such that VF 0
653 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
654 * In order to avoid collision we start at the first free queue
655 * and continue consuming queues in the same sequence
657 if (adapter->vfs_allocated_count) {
658 for (; i < adapter->rss_queues; i++)
659 adapter->rx_ring[i]->reg_idx = rbase_offset +
660 Q_IDX_82576(i);
662 case e1000_82575:
663 case e1000_82580:
664 case e1000_i350:
665 default:
666 for (; i < adapter->num_rx_queues; i++)
667 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
668 for (; j < adapter->num_tx_queues; j++)
669 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
670 break;
674 static void igb_free_queues(struct igb_adapter *adapter)
676 int i;
678 for (i = 0; i < adapter->num_tx_queues; i++) {
679 kfree(adapter->tx_ring[i]);
680 adapter->tx_ring[i] = NULL;
682 for (i = 0; i < adapter->num_rx_queues; i++) {
683 kfree(adapter->rx_ring[i]);
684 adapter->rx_ring[i] = NULL;
686 adapter->num_rx_queues = 0;
687 adapter->num_tx_queues = 0;
691 * igb_alloc_queues - Allocate memory for all rings
692 * @adapter: board private structure to initialize
694 * We allocate one ring per queue at run-time since we don't know the
695 * number of queues at compile-time.
697 static int igb_alloc_queues(struct igb_adapter *adapter)
699 struct igb_ring *ring;
700 int i;
701 int orig_node = adapter->node;
703 for (i = 0; i < adapter->num_tx_queues; i++) {
704 if (orig_node == -1) {
705 int cur_node = next_online_node(adapter->node);
706 if (cur_node == MAX_NUMNODES)
707 cur_node = first_online_node;
708 adapter->node = cur_node;
710 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
711 adapter->node);
712 if (!ring)
713 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
714 if (!ring)
715 goto err;
716 ring->count = adapter->tx_ring_count;
717 ring->queue_index = i;
718 ring->dev = &adapter->pdev->dev;
719 ring->netdev = adapter->netdev;
720 ring->numa_node = adapter->node;
721 /* For 82575, context index must be unique per ring. */
722 if (adapter->hw.mac.type == e1000_82575)
723 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
724 adapter->tx_ring[i] = ring;
726 /* Restore the adapter's original node */
727 adapter->node = orig_node;
729 for (i = 0; i < adapter->num_rx_queues; i++) {
730 if (orig_node == -1) {
731 int cur_node = next_online_node(adapter->node);
732 if (cur_node == MAX_NUMNODES)
733 cur_node = first_online_node;
734 adapter->node = cur_node;
736 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
737 adapter->node);
738 if (!ring)
739 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
740 if (!ring)
741 goto err;
742 ring->count = adapter->rx_ring_count;
743 ring->queue_index = i;
744 ring->dev = &adapter->pdev->dev;
745 ring->netdev = adapter->netdev;
746 ring->numa_node = adapter->node;
747 /* set flag indicating ring supports SCTP checksum offload */
748 if (adapter->hw.mac.type >= e1000_82576)
749 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
751 /* On i350, loopback VLAN packets have the tag byte-swapped. */
752 if (adapter->hw.mac.type == e1000_i350)
753 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
755 adapter->rx_ring[i] = ring;
757 /* Restore the adapter's original node */
758 adapter->node = orig_node;
760 igb_cache_ring_register(adapter);
762 return 0;
764 err:
765 /* Restore the adapter's original node */
766 adapter->node = orig_node;
767 igb_free_queues(adapter);
769 return -ENOMEM;
773 * igb_write_ivar - configure ivar for given MSI-X vector
774 * @hw: pointer to the HW structure
775 * @msix_vector: vector number we are allocating to a given ring
776 * @index: row index of IVAR register to write within IVAR table
777 * @offset: column offset of in IVAR, should be multiple of 8
779 * This function is intended to handle the writing of the IVAR register
780 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
781 * each containing an cause allocation for an Rx and Tx ring, and a
782 * variable number of rows depending on the number of queues supported.
784 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
785 int index, int offset)
787 u32 ivar = array_rd32(E1000_IVAR0, index);
789 /* clear any bits that are currently set */
790 ivar &= ~((u32)0xFF << offset);
792 /* write vector and valid bit */
793 ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
795 array_wr32(E1000_IVAR0, index, ivar);
798 #define IGB_N0_QUEUE -1
799 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
801 struct igb_adapter *adapter = q_vector->adapter;
802 struct e1000_hw *hw = &adapter->hw;
803 int rx_queue = IGB_N0_QUEUE;
804 int tx_queue = IGB_N0_QUEUE;
805 u32 msixbm = 0;
807 if (q_vector->rx.ring)
808 rx_queue = q_vector->rx.ring->reg_idx;
809 if (q_vector->tx.ring)
810 tx_queue = q_vector->tx.ring->reg_idx;
812 switch (hw->mac.type) {
813 case e1000_82575:
814 /* The 82575 assigns vectors using a bitmask, which matches the
815 bitmask for the EICR/EIMS/EIMC registers. To assign one
816 or more queues to a vector, we write the appropriate bits
817 into the MSIXBM register for that vector. */
818 if (rx_queue > IGB_N0_QUEUE)
819 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
820 if (tx_queue > IGB_N0_QUEUE)
821 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
822 if (!adapter->msix_entries && msix_vector == 0)
823 msixbm |= E1000_EIMS_OTHER;
824 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
825 q_vector->eims_value = msixbm;
826 break;
827 case e1000_82576:
829 * 82576 uses a table that essentially consists of 2 columns
830 * with 8 rows. The ordering is column-major so we use the
831 * lower 3 bits as the row index, and the 4th bit as the
832 * column offset.
834 if (rx_queue > IGB_N0_QUEUE)
835 igb_write_ivar(hw, msix_vector,
836 rx_queue & 0x7,
837 (rx_queue & 0x8) << 1);
838 if (tx_queue > IGB_N0_QUEUE)
839 igb_write_ivar(hw, msix_vector,
840 tx_queue & 0x7,
841 ((tx_queue & 0x8) << 1) + 8);
842 q_vector->eims_value = 1 << msix_vector;
843 break;
844 case e1000_82580:
845 case e1000_i350:
847 * On 82580 and newer adapters the scheme is similar to 82576
848 * however instead of ordering column-major we have things
849 * ordered row-major. So we traverse the table by using
850 * bit 0 as the column offset, and the remaining bits as the
851 * row index.
853 if (rx_queue > IGB_N0_QUEUE)
854 igb_write_ivar(hw, msix_vector,
855 rx_queue >> 1,
856 (rx_queue & 0x1) << 4);
857 if (tx_queue > IGB_N0_QUEUE)
858 igb_write_ivar(hw, msix_vector,
859 tx_queue >> 1,
860 ((tx_queue & 0x1) << 4) + 8);
861 q_vector->eims_value = 1 << msix_vector;
862 break;
863 default:
864 BUG();
865 break;
868 /* add q_vector eims value to global eims_enable_mask */
869 adapter->eims_enable_mask |= q_vector->eims_value;
871 /* configure q_vector to set itr on first interrupt */
872 q_vector->set_itr = 1;
876 * igb_configure_msix - Configure MSI-X hardware
878 * igb_configure_msix sets up the hardware to properly
879 * generate MSI-X interrupts.
881 static void igb_configure_msix(struct igb_adapter *adapter)
883 u32 tmp;
884 int i, vector = 0;
885 struct e1000_hw *hw = &adapter->hw;
887 adapter->eims_enable_mask = 0;
889 /* set vector for other causes, i.e. link changes */
890 switch (hw->mac.type) {
891 case e1000_82575:
892 tmp = rd32(E1000_CTRL_EXT);
893 /* enable MSI-X PBA support*/
894 tmp |= E1000_CTRL_EXT_PBA_CLR;
896 /* Auto-Mask interrupts upon ICR read. */
897 tmp |= E1000_CTRL_EXT_EIAME;
898 tmp |= E1000_CTRL_EXT_IRCA;
900 wr32(E1000_CTRL_EXT, tmp);
902 /* enable msix_other interrupt */
903 array_wr32(E1000_MSIXBM(0), vector++,
904 E1000_EIMS_OTHER);
905 adapter->eims_other = E1000_EIMS_OTHER;
907 break;
909 case e1000_82576:
910 case e1000_82580:
911 case e1000_i350:
912 /* Turn on MSI-X capability first, or our settings
913 * won't stick. And it will take days to debug. */
914 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
915 E1000_GPIE_PBA | E1000_GPIE_EIAME |
916 E1000_GPIE_NSICR);
918 /* enable msix_other interrupt */
919 adapter->eims_other = 1 << vector;
920 tmp = (vector++ | E1000_IVAR_VALID) << 8;
922 wr32(E1000_IVAR_MISC, tmp);
923 break;
924 default:
925 /* do nothing, since nothing else supports MSI-X */
926 break;
927 } /* switch (hw->mac.type) */
929 adapter->eims_enable_mask |= adapter->eims_other;
931 for (i = 0; i < adapter->num_q_vectors; i++)
932 igb_assign_vector(adapter->q_vector[i], vector++);
934 wrfl();
938 * igb_request_msix - Initialize MSI-X interrupts
940 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
941 * kernel.
943 static int igb_request_msix(struct igb_adapter *adapter)
945 struct net_device *netdev = adapter->netdev;
946 struct e1000_hw *hw = &adapter->hw;
947 int i, err = 0, vector = 0;
949 err = request_irq(adapter->msix_entries[vector].vector,
950 igb_msix_other, 0, netdev->name, adapter);
951 if (err)
952 goto out;
953 vector++;
955 for (i = 0; i < adapter->num_q_vectors; i++) {
956 struct igb_q_vector *q_vector = adapter->q_vector[i];
958 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
960 if (q_vector->rx.ring && q_vector->tx.ring)
961 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
962 q_vector->rx.ring->queue_index);
963 else if (q_vector->tx.ring)
964 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
965 q_vector->tx.ring->queue_index);
966 else if (q_vector->rx.ring)
967 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
968 q_vector->rx.ring->queue_index);
969 else
970 sprintf(q_vector->name, "%s-unused", netdev->name);
972 err = request_irq(adapter->msix_entries[vector].vector,
973 igb_msix_ring, 0, q_vector->name,
974 q_vector);
975 if (err)
976 goto out;
977 vector++;
980 igb_configure_msix(adapter);
981 return 0;
982 out:
983 return err;
986 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
988 if (adapter->msix_entries) {
989 pci_disable_msix(adapter->pdev);
990 kfree(adapter->msix_entries);
991 adapter->msix_entries = NULL;
992 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
993 pci_disable_msi(adapter->pdev);
998 * igb_free_q_vectors - Free memory allocated for interrupt vectors
999 * @adapter: board private structure to initialize
1001 * This function frees the memory allocated to the q_vectors. In addition if
1002 * NAPI is enabled it will delete any references to the NAPI struct prior
1003 * to freeing the q_vector.
1005 static void igb_free_q_vectors(struct igb_adapter *adapter)
1007 int v_idx;
1009 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1010 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1011 adapter->q_vector[v_idx] = NULL;
1012 if (!q_vector)
1013 continue;
1014 netif_napi_del(&q_vector->napi);
1015 kfree(q_vector);
1017 adapter->num_q_vectors = 0;
1021 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1023 * This function resets the device so that it has 0 rx queues, tx queues, and
1024 * MSI-X interrupts allocated.
1026 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1028 igb_free_queues(adapter);
1029 igb_free_q_vectors(adapter);
1030 igb_reset_interrupt_capability(adapter);
1034 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1036 * Attempt to configure interrupts using the best available
1037 * capabilities of the hardware and kernel.
1039 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1041 int err;
1042 int numvecs, i;
1044 /* Number of supported queues. */
1045 adapter->num_rx_queues = adapter->rss_queues;
1046 if (adapter->vfs_allocated_count)
1047 adapter->num_tx_queues = 1;
1048 else
1049 adapter->num_tx_queues = adapter->rss_queues;
1051 /* start with one vector for every rx queue */
1052 numvecs = adapter->num_rx_queues;
1054 /* if tx handler is separate add 1 for every tx queue */
1055 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1056 numvecs += adapter->num_tx_queues;
1058 /* store the number of vectors reserved for queues */
1059 adapter->num_q_vectors = numvecs;
1061 /* add 1 vector for link status interrupts */
1062 numvecs++;
1063 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1064 GFP_KERNEL);
1065 if (!adapter->msix_entries)
1066 goto msi_only;
1068 for (i = 0; i < numvecs; i++)
1069 adapter->msix_entries[i].entry = i;
1071 err = pci_enable_msix(adapter->pdev,
1072 adapter->msix_entries,
1073 numvecs);
1074 if (err == 0)
1075 goto out;
1077 igb_reset_interrupt_capability(adapter);
1079 /* If we can't do MSI-X, try MSI */
1080 msi_only:
1081 #ifdef CONFIG_PCI_IOV
1082 /* disable SR-IOV for non MSI-X configurations */
1083 if (adapter->vf_data) {
1084 struct e1000_hw *hw = &adapter->hw;
1085 /* disable iov and allow time for transactions to clear */
1086 pci_disable_sriov(adapter->pdev);
1087 msleep(500);
1089 kfree(adapter->vf_data);
1090 adapter->vf_data = NULL;
1091 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1092 wrfl();
1093 msleep(100);
1094 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1096 #endif
1097 adapter->vfs_allocated_count = 0;
1098 adapter->rss_queues = 1;
1099 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1100 adapter->num_rx_queues = 1;
1101 adapter->num_tx_queues = 1;
1102 adapter->num_q_vectors = 1;
1103 if (!pci_enable_msi(adapter->pdev))
1104 adapter->flags |= IGB_FLAG_HAS_MSI;
1105 out:
1106 /* Notify the stack of the (possibly) reduced queue counts. */
1107 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1108 return netif_set_real_num_rx_queues(adapter->netdev,
1109 adapter->num_rx_queues);
1113 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1114 * @adapter: board private structure to initialize
1116 * We allocate one q_vector per queue interrupt. If allocation fails we
1117 * return -ENOMEM.
1119 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1121 struct igb_q_vector *q_vector;
1122 struct e1000_hw *hw = &adapter->hw;
1123 int v_idx;
1124 int orig_node = adapter->node;
1126 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1127 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1128 adapter->num_tx_queues)) &&
1129 (adapter->num_rx_queues == v_idx))
1130 adapter->node = orig_node;
1131 if (orig_node == -1) {
1132 int cur_node = next_online_node(adapter->node);
1133 if (cur_node == MAX_NUMNODES)
1134 cur_node = first_online_node;
1135 adapter->node = cur_node;
1137 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1138 adapter->node);
1139 if (!q_vector)
1140 q_vector = kzalloc(sizeof(struct igb_q_vector),
1141 GFP_KERNEL);
1142 if (!q_vector)
1143 goto err_out;
1144 q_vector->adapter = adapter;
1145 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1146 q_vector->itr_val = IGB_START_ITR;
1147 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1148 adapter->q_vector[v_idx] = q_vector;
1150 /* Restore the adapter's original node */
1151 adapter->node = orig_node;
1153 return 0;
1155 err_out:
1156 /* Restore the adapter's original node */
1157 adapter->node = orig_node;
1158 igb_free_q_vectors(adapter);
1159 return -ENOMEM;
1162 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1163 int ring_idx, int v_idx)
1165 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1167 q_vector->rx.ring = adapter->rx_ring[ring_idx];
1168 q_vector->rx.ring->q_vector = q_vector;
1169 q_vector->rx.count++;
1170 q_vector->itr_val = adapter->rx_itr_setting;
1171 if (q_vector->itr_val && q_vector->itr_val <= 3)
1172 q_vector->itr_val = IGB_START_ITR;
1175 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1176 int ring_idx, int v_idx)
1178 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1180 q_vector->tx.ring = adapter->tx_ring[ring_idx];
1181 q_vector->tx.ring->q_vector = q_vector;
1182 q_vector->tx.count++;
1183 q_vector->itr_val = adapter->tx_itr_setting;
1184 q_vector->tx.work_limit = adapter->tx_work_limit;
1185 if (q_vector->itr_val && q_vector->itr_val <= 3)
1186 q_vector->itr_val = IGB_START_ITR;
1190 * igb_map_ring_to_vector - maps allocated queues to vectors
1192 * This function maps the recently allocated queues to vectors.
1194 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1196 int i;
1197 int v_idx = 0;
1199 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1200 (adapter->num_q_vectors < adapter->num_tx_queues))
1201 return -ENOMEM;
1203 if (adapter->num_q_vectors >=
1204 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1205 for (i = 0; i < adapter->num_rx_queues; i++)
1206 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1207 for (i = 0; i < adapter->num_tx_queues; i++)
1208 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1209 } else {
1210 for (i = 0; i < adapter->num_rx_queues; i++) {
1211 if (i < adapter->num_tx_queues)
1212 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1213 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1215 for (; i < adapter->num_tx_queues; i++)
1216 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1218 return 0;
1222 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1224 * This function initializes the interrupts and allocates all of the queues.
1226 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1228 struct pci_dev *pdev = adapter->pdev;
1229 int err;
1231 err = igb_set_interrupt_capability(adapter);
1232 if (err)
1233 return err;
1235 err = igb_alloc_q_vectors(adapter);
1236 if (err) {
1237 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1238 goto err_alloc_q_vectors;
1241 err = igb_alloc_queues(adapter);
1242 if (err) {
1243 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1244 goto err_alloc_queues;
1247 err = igb_map_ring_to_vector(adapter);
1248 if (err) {
1249 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1250 goto err_map_queues;
1254 return 0;
1255 err_map_queues:
1256 igb_free_queues(adapter);
1257 err_alloc_queues:
1258 igb_free_q_vectors(adapter);
1259 err_alloc_q_vectors:
1260 igb_reset_interrupt_capability(adapter);
1261 return err;
1265 * igb_request_irq - initialize interrupts
1267 * Attempts to configure interrupts using the best available
1268 * capabilities of the hardware and kernel.
1270 static int igb_request_irq(struct igb_adapter *adapter)
1272 struct net_device *netdev = adapter->netdev;
1273 struct pci_dev *pdev = adapter->pdev;
1274 int err = 0;
1276 if (adapter->msix_entries) {
1277 err = igb_request_msix(adapter);
1278 if (!err)
1279 goto request_done;
1280 /* fall back to MSI */
1281 igb_clear_interrupt_scheme(adapter);
1282 if (!pci_enable_msi(pdev))
1283 adapter->flags |= IGB_FLAG_HAS_MSI;
1284 igb_free_all_tx_resources(adapter);
1285 igb_free_all_rx_resources(adapter);
1286 adapter->num_tx_queues = 1;
1287 adapter->num_rx_queues = 1;
1288 adapter->num_q_vectors = 1;
1289 err = igb_alloc_q_vectors(adapter);
1290 if (err) {
1291 dev_err(&pdev->dev,
1292 "Unable to allocate memory for vectors\n");
1293 goto request_done;
1295 err = igb_alloc_queues(adapter);
1296 if (err) {
1297 dev_err(&pdev->dev,
1298 "Unable to allocate memory for queues\n");
1299 igb_free_q_vectors(adapter);
1300 goto request_done;
1302 igb_setup_all_tx_resources(adapter);
1303 igb_setup_all_rx_resources(adapter);
1306 igb_assign_vector(adapter->q_vector[0], 0);
1308 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1309 err = request_irq(pdev->irq, igb_intr_msi, 0,
1310 netdev->name, adapter);
1311 if (!err)
1312 goto request_done;
1314 /* fall back to legacy interrupts */
1315 igb_reset_interrupt_capability(adapter);
1316 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1319 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1320 netdev->name, adapter);
1322 if (err)
1323 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1324 err);
1326 request_done:
1327 return err;
1330 static void igb_free_irq(struct igb_adapter *adapter)
1332 if (adapter->msix_entries) {
1333 int vector = 0, i;
1335 free_irq(adapter->msix_entries[vector++].vector, adapter);
1337 for (i = 0; i < adapter->num_q_vectors; i++)
1338 free_irq(adapter->msix_entries[vector++].vector,
1339 adapter->q_vector[i]);
1340 } else {
1341 free_irq(adapter->pdev->irq, adapter);
1346 * igb_irq_disable - Mask off interrupt generation on the NIC
1347 * @adapter: board private structure
1349 static void igb_irq_disable(struct igb_adapter *adapter)
1351 struct e1000_hw *hw = &adapter->hw;
1354 * we need to be careful when disabling interrupts. The VFs are also
1355 * mapped into these registers and so clearing the bits can cause
1356 * issues on the VF drivers so we only need to clear what we set
1358 if (adapter->msix_entries) {
1359 u32 regval = rd32(E1000_EIAM);
1360 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1361 wr32(E1000_EIMC, adapter->eims_enable_mask);
1362 regval = rd32(E1000_EIAC);
1363 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1366 wr32(E1000_IAM, 0);
1367 wr32(E1000_IMC, ~0);
1368 wrfl();
1369 if (adapter->msix_entries) {
1370 int i;
1371 for (i = 0; i < adapter->num_q_vectors; i++)
1372 synchronize_irq(adapter->msix_entries[i].vector);
1373 } else {
1374 synchronize_irq(adapter->pdev->irq);
1379 * igb_irq_enable - Enable default interrupt generation settings
1380 * @adapter: board private structure
1382 static void igb_irq_enable(struct igb_adapter *adapter)
1384 struct e1000_hw *hw = &adapter->hw;
1386 if (adapter->msix_entries) {
1387 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1388 u32 regval = rd32(E1000_EIAC);
1389 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1390 regval = rd32(E1000_EIAM);
1391 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1392 wr32(E1000_EIMS, adapter->eims_enable_mask);
1393 if (adapter->vfs_allocated_count) {
1394 wr32(E1000_MBVFIMR, 0xFF);
1395 ims |= E1000_IMS_VMMB;
1397 wr32(E1000_IMS, ims);
1398 } else {
1399 wr32(E1000_IMS, IMS_ENABLE_MASK |
1400 E1000_IMS_DRSTA);
1401 wr32(E1000_IAM, IMS_ENABLE_MASK |
1402 E1000_IMS_DRSTA);
1406 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1408 struct e1000_hw *hw = &adapter->hw;
1409 u16 vid = adapter->hw.mng_cookie.vlan_id;
1410 u16 old_vid = adapter->mng_vlan_id;
1412 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1413 /* add VID to filter table */
1414 igb_vfta_set(hw, vid, true);
1415 adapter->mng_vlan_id = vid;
1416 } else {
1417 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1420 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1421 (vid != old_vid) &&
1422 !test_bit(old_vid, adapter->active_vlans)) {
1423 /* remove VID from filter table */
1424 igb_vfta_set(hw, old_vid, false);
1429 * igb_release_hw_control - release control of the h/w to f/w
1430 * @adapter: address of board private structure
1432 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1433 * For ASF and Pass Through versions of f/w this means that the
1434 * driver is no longer loaded.
1437 static void igb_release_hw_control(struct igb_adapter *adapter)
1439 struct e1000_hw *hw = &adapter->hw;
1440 u32 ctrl_ext;
1442 /* Let firmware take over control of h/w */
1443 ctrl_ext = rd32(E1000_CTRL_EXT);
1444 wr32(E1000_CTRL_EXT,
1445 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1449 * igb_get_hw_control - get control of the h/w from f/w
1450 * @adapter: address of board private structure
1452 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1453 * For ASF and Pass Through versions of f/w this means that
1454 * the driver is loaded.
1457 static void igb_get_hw_control(struct igb_adapter *adapter)
1459 struct e1000_hw *hw = &adapter->hw;
1460 u32 ctrl_ext;
1462 /* Let firmware know the driver has taken over */
1463 ctrl_ext = rd32(E1000_CTRL_EXT);
1464 wr32(E1000_CTRL_EXT,
1465 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1469 * igb_configure - configure the hardware for RX and TX
1470 * @adapter: private board structure
1472 static void igb_configure(struct igb_adapter *adapter)
1474 struct net_device *netdev = adapter->netdev;
1475 int i;
1477 igb_get_hw_control(adapter);
1478 igb_set_rx_mode(netdev);
1480 igb_restore_vlan(adapter);
1482 igb_setup_tctl(adapter);
1483 igb_setup_mrqc(adapter);
1484 igb_setup_rctl(adapter);
1486 igb_configure_tx(adapter);
1487 igb_configure_rx(adapter);
1489 igb_rx_fifo_flush_82575(&adapter->hw);
1491 /* call igb_desc_unused which always leaves
1492 * at least 1 descriptor unused to make sure
1493 * next_to_use != next_to_clean */
1494 for (i = 0; i < adapter->num_rx_queues; i++) {
1495 struct igb_ring *ring = adapter->rx_ring[i];
1496 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1501 * igb_power_up_link - Power up the phy/serdes link
1502 * @adapter: address of board private structure
1504 void igb_power_up_link(struct igb_adapter *adapter)
1506 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1507 igb_power_up_phy_copper(&adapter->hw);
1508 else
1509 igb_power_up_serdes_link_82575(&adapter->hw);
1513 * igb_power_down_link - Power down the phy/serdes link
1514 * @adapter: address of board private structure
1516 static void igb_power_down_link(struct igb_adapter *adapter)
1518 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1519 igb_power_down_phy_copper_82575(&adapter->hw);
1520 else
1521 igb_shutdown_serdes_link_82575(&adapter->hw);
1525 * igb_up - Open the interface and prepare it to handle traffic
1526 * @adapter: board private structure
1528 int igb_up(struct igb_adapter *adapter)
1530 struct e1000_hw *hw = &adapter->hw;
1531 int i;
1533 /* hardware has been reset, we need to reload some things */
1534 igb_configure(adapter);
1536 clear_bit(__IGB_DOWN, &adapter->state);
1538 for (i = 0; i < adapter->num_q_vectors; i++)
1539 napi_enable(&(adapter->q_vector[i]->napi));
1541 if (adapter->msix_entries)
1542 igb_configure_msix(adapter);
1543 else
1544 igb_assign_vector(adapter->q_vector[0], 0);
1546 /* Clear any pending interrupts. */
1547 rd32(E1000_ICR);
1548 igb_irq_enable(adapter);
1550 /* notify VFs that reset has been completed */
1551 if (adapter->vfs_allocated_count) {
1552 u32 reg_data = rd32(E1000_CTRL_EXT);
1553 reg_data |= E1000_CTRL_EXT_PFRSTD;
1554 wr32(E1000_CTRL_EXT, reg_data);
1557 netif_tx_start_all_queues(adapter->netdev);
1559 /* start the watchdog. */
1560 hw->mac.get_link_status = 1;
1561 schedule_work(&adapter->watchdog_task);
1563 return 0;
1566 void igb_down(struct igb_adapter *adapter)
1568 struct net_device *netdev = adapter->netdev;
1569 struct e1000_hw *hw = &adapter->hw;
1570 u32 tctl, rctl;
1571 int i;
1573 /* signal that we're down so the interrupt handler does not
1574 * reschedule our watchdog timer */
1575 set_bit(__IGB_DOWN, &adapter->state);
1577 /* disable receives in the hardware */
1578 rctl = rd32(E1000_RCTL);
1579 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1580 /* flush and sleep below */
1582 netif_tx_stop_all_queues(netdev);
1584 /* disable transmits in the hardware */
1585 tctl = rd32(E1000_TCTL);
1586 tctl &= ~E1000_TCTL_EN;
1587 wr32(E1000_TCTL, tctl);
1588 /* flush both disables and wait for them to finish */
1589 wrfl();
1590 msleep(10);
1592 for (i = 0; i < adapter->num_q_vectors; i++)
1593 napi_disable(&(adapter->q_vector[i]->napi));
1595 igb_irq_disable(adapter);
1597 del_timer_sync(&adapter->watchdog_timer);
1598 del_timer_sync(&adapter->phy_info_timer);
1600 netif_carrier_off(netdev);
1602 /* record the stats before reset*/
1603 spin_lock(&adapter->stats64_lock);
1604 igb_update_stats(adapter, &adapter->stats64);
1605 spin_unlock(&adapter->stats64_lock);
1607 adapter->link_speed = 0;
1608 adapter->link_duplex = 0;
1610 if (!pci_channel_offline(adapter->pdev))
1611 igb_reset(adapter);
1612 igb_clean_all_tx_rings(adapter);
1613 igb_clean_all_rx_rings(adapter);
1614 #ifdef CONFIG_IGB_DCA
1616 /* since we reset the hardware DCA settings were cleared */
1617 igb_setup_dca(adapter);
1618 #endif
1621 void igb_reinit_locked(struct igb_adapter *adapter)
1623 WARN_ON(in_interrupt());
1624 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1625 msleep(1);
1626 igb_down(adapter);
1627 igb_up(adapter);
1628 clear_bit(__IGB_RESETTING, &adapter->state);
1631 void igb_reset(struct igb_adapter *adapter)
1633 struct pci_dev *pdev = adapter->pdev;
1634 struct e1000_hw *hw = &adapter->hw;
1635 struct e1000_mac_info *mac = &hw->mac;
1636 struct e1000_fc_info *fc = &hw->fc;
1637 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1638 u16 hwm;
1640 /* Repartition Pba for greater than 9k mtu
1641 * To take effect CTRL.RST is required.
1643 switch (mac->type) {
1644 case e1000_i350:
1645 case e1000_82580:
1646 pba = rd32(E1000_RXPBS);
1647 pba = igb_rxpbs_adjust_82580(pba);
1648 break;
1649 case e1000_82576:
1650 pba = rd32(E1000_RXPBS);
1651 pba &= E1000_RXPBS_SIZE_MASK_82576;
1652 break;
1653 case e1000_82575:
1654 default:
1655 pba = E1000_PBA_34K;
1656 break;
1659 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1660 (mac->type < e1000_82576)) {
1661 /* adjust PBA for jumbo frames */
1662 wr32(E1000_PBA, pba);
1664 /* To maintain wire speed transmits, the Tx FIFO should be
1665 * large enough to accommodate two full transmit packets,
1666 * rounded up to the next 1KB and expressed in KB. Likewise,
1667 * the Rx FIFO should be large enough to accommodate at least
1668 * one full receive packet and is similarly rounded up and
1669 * expressed in KB. */
1670 pba = rd32(E1000_PBA);
1671 /* upper 16 bits has Tx packet buffer allocation size in KB */
1672 tx_space = pba >> 16;
1673 /* lower 16 bits has Rx packet buffer allocation size in KB */
1674 pba &= 0xffff;
1675 /* the tx fifo also stores 16 bytes of information about the tx
1676 * but don't include ethernet FCS because hardware appends it */
1677 min_tx_space = (adapter->max_frame_size +
1678 sizeof(union e1000_adv_tx_desc) -
1679 ETH_FCS_LEN) * 2;
1680 min_tx_space = ALIGN(min_tx_space, 1024);
1681 min_tx_space >>= 10;
1682 /* software strips receive CRC, so leave room for it */
1683 min_rx_space = adapter->max_frame_size;
1684 min_rx_space = ALIGN(min_rx_space, 1024);
1685 min_rx_space >>= 10;
1687 /* If current Tx allocation is less than the min Tx FIFO size,
1688 * and the min Tx FIFO size is less than the current Rx FIFO
1689 * allocation, take space away from current Rx allocation */
1690 if (tx_space < min_tx_space &&
1691 ((min_tx_space - tx_space) < pba)) {
1692 pba = pba - (min_tx_space - tx_space);
1694 /* if short on rx space, rx wins and must trump tx
1695 * adjustment */
1696 if (pba < min_rx_space)
1697 pba = min_rx_space;
1699 wr32(E1000_PBA, pba);
1702 /* flow control settings */
1703 /* The high water mark must be low enough to fit one full frame
1704 * (or the size used for early receive) above it in the Rx FIFO.
1705 * Set it to the lower of:
1706 * - 90% of the Rx FIFO size, or
1707 * - the full Rx FIFO size minus one full frame */
1708 hwm = min(((pba << 10) * 9 / 10),
1709 ((pba << 10) - 2 * adapter->max_frame_size));
1711 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1712 fc->low_water = fc->high_water - 16;
1713 fc->pause_time = 0xFFFF;
1714 fc->send_xon = 1;
1715 fc->current_mode = fc->requested_mode;
1717 /* disable receive for all VFs and wait one second */
1718 if (adapter->vfs_allocated_count) {
1719 int i;
1720 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1721 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1723 /* ping all the active vfs to let them know we are going down */
1724 igb_ping_all_vfs(adapter);
1726 /* disable transmits and receives */
1727 wr32(E1000_VFRE, 0);
1728 wr32(E1000_VFTE, 0);
1731 /* Allow time for pending master requests to run */
1732 hw->mac.ops.reset_hw(hw);
1733 wr32(E1000_WUC, 0);
1735 if (hw->mac.ops.init_hw(hw))
1736 dev_err(&pdev->dev, "Hardware Error\n");
1738 igb_init_dmac(adapter, pba);
1739 if (!netif_running(adapter->netdev))
1740 igb_power_down_link(adapter);
1742 igb_update_mng_vlan(adapter);
1744 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1745 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1747 igb_get_phy_info(hw);
1750 static netdev_features_t igb_fix_features(struct net_device *netdev,
1751 netdev_features_t features)
1754 * Since there is no support for separate rx/tx vlan accel
1755 * enable/disable make sure tx flag is always in same state as rx.
1757 if (features & NETIF_F_HW_VLAN_RX)
1758 features |= NETIF_F_HW_VLAN_TX;
1759 else
1760 features &= ~NETIF_F_HW_VLAN_TX;
1762 return features;
1765 static int igb_set_features(struct net_device *netdev,
1766 netdev_features_t features)
1768 netdev_features_t changed = netdev->features ^ features;
1770 if (changed & NETIF_F_HW_VLAN_RX)
1771 igb_vlan_mode(netdev, features);
1773 return 0;
1776 static const struct net_device_ops igb_netdev_ops = {
1777 .ndo_open = igb_open,
1778 .ndo_stop = igb_close,
1779 .ndo_start_xmit = igb_xmit_frame,
1780 .ndo_get_stats64 = igb_get_stats64,
1781 .ndo_set_rx_mode = igb_set_rx_mode,
1782 .ndo_set_mac_address = igb_set_mac,
1783 .ndo_change_mtu = igb_change_mtu,
1784 .ndo_do_ioctl = igb_ioctl,
1785 .ndo_tx_timeout = igb_tx_timeout,
1786 .ndo_validate_addr = eth_validate_addr,
1787 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1788 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1789 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1790 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1791 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1792 .ndo_get_vf_config = igb_ndo_get_vf_config,
1793 #ifdef CONFIG_NET_POLL_CONTROLLER
1794 .ndo_poll_controller = igb_netpoll,
1795 #endif
1796 .ndo_fix_features = igb_fix_features,
1797 .ndo_set_features = igb_set_features,
1801 * igb_probe - Device Initialization Routine
1802 * @pdev: PCI device information struct
1803 * @ent: entry in igb_pci_tbl
1805 * Returns 0 on success, negative on failure
1807 * igb_probe initializes an adapter identified by a pci_dev structure.
1808 * The OS initialization, configuring of the adapter private structure,
1809 * and a hardware reset occur.
1811 static int __devinit igb_probe(struct pci_dev *pdev,
1812 const struct pci_device_id *ent)
1814 struct net_device *netdev;
1815 struct igb_adapter *adapter;
1816 struct e1000_hw *hw;
1817 u16 eeprom_data = 0;
1818 s32 ret_val;
1819 static int global_quad_port_a; /* global quad port a indication */
1820 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1821 unsigned long mmio_start, mmio_len;
1822 int err, pci_using_dac;
1823 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1824 u8 part_str[E1000_PBANUM_LENGTH];
1826 /* Catch broken hardware that put the wrong VF device ID in
1827 * the PCIe SR-IOV capability.
1829 if (pdev->is_virtfn) {
1830 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1831 pci_name(pdev), pdev->vendor, pdev->device);
1832 return -EINVAL;
1835 err = pci_enable_device_mem(pdev);
1836 if (err)
1837 return err;
1839 pci_using_dac = 0;
1840 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1841 if (!err) {
1842 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1843 if (!err)
1844 pci_using_dac = 1;
1845 } else {
1846 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1847 if (err) {
1848 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1849 if (err) {
1850 dev_err(&pdev->dev, "No usable DMA "
1851 "configuration, aborting\n");
1852 goto err_dma;
1857 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1858 IORESOURCE_MEM),
1859 igb_driver_name);
1860 if (err)
1861 goto err_pci_reg;
1863 pci_enable_pcie_error_reporting(pdev);
1865 pci_set_master(pdev);
1866 pci_save_state(pdev);
1868 err = -ENOMEM;
1869 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1870 IGB_MAX_TX_QUEUES);
1871 if (!netdev)
1872 goto err_alloc_etherdev;
1874 SET_NETDEV_DEV(netdev, &pdev->dev);
1876 pci_set_drvdata(pdev, netdev);
1877 adapter = netdev_priv(netdev);
1878 adapter->netdev = netdev;
1879 adapter->pdev = pdev;
1880 hw = &adapter->hw;
1881 hw->back = adapter;
1882 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1884 mmio_start = pci_resource_start(pdev, 0);
1885 mmio_len = pci_resource_len(pdev, 0);
1887 err = -EIO;
1888 hw->hw_addr = ioremap(mmio_start, mmio_len);
1889 if (!hw->hw_addr)
1890 goto err_ioremap;
1892 netdev->netdev_ops = &igb_netdev_ops;
1893 igb_set_ethtool_ops(netdev);
1894 netdev->watchdog_timeo = 5 * HZ;
1896 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1898 netdev->mem_start = mmio_start;
1899 netdev->mem_end = mmio_start + mmio_len;
1901 /* PCI config space info */
1902 hw->vendor_id = pdev->vendor;
1903 hw->device_id = pdev->device;
1904 hw->revision_id = pdev->revision;
1905 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1906 hw->subsystem_device_id = pdev->subsystem_device;
1908 /* Copy the default MAC, PHY and NVM function pointers */
1909 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1910 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1911 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1912 /* Initialize skew-specific constants */
1913 err = ei->get_invariants(hw);
1914 if (err)
1915 goto err_sw_init;
1917 /* setup the private structure */
1918 err = igb_sw_init(adapter);
1919 if (err)
1920 goto err_sw_init;
1922 igb_get_bus_info_pcie(hw);
1924 hw->phy.autoneg_wait_to_complete = false;
1926 /* Copper options */
1927 if (hw->phy.media_type == e1000_media_type_copper) {
1928 hw->phy.mdix = AUTO_ALL_MODES;
1929 hw->phy.disable_polarity_correction = false;
1930 hw->phy.ms_type = e1000_ms_hw_default;
1933 if (igb_check_reset_block(hw))
1934 dev_info(&pdev->dev,
1935 "PHY reset is blocked due to SOL/IDER session.\n");
1938 * features is initialized to 0 in allocation, it might have bits
1939 * set by igb_sw_init so we should use an or instead of an
1940 * assignment.
1942 netdev->features |= NETIF_F_SG |
1943 NETIF_F_IP_CSUM |
1944 NETIF_F_IPV6_CSUM |
1945 NETIF_F_TSO |
1946 NETIF_F_TSO6 |
1947 NETIF_F_RXHASH |
1948 NETIF_F_RXCSUM |
1949 NETIF_F_HW_VLAN_RX |
1950 NETIF_F_HW_VLAN_TX;
1952 /* copy netdev features into list of user selectable features */
1953 netdev->hw_features |= netdev->features;
1955 /* set this bit last since it cannot be part of hw_features */
1956 netdev->features |= NETIF_F_HW_VLAN_FILTER;
1958 netdev->vlan_features |= NETIF_F_TSO |
1959 NETIF_F_TSO6 |
1960 NETIF_F_IP_CSUM |
1961 NETIF_F_IPV6_CSUM |
1962 NETIF_F_SG;
1964 if (pci_using_dac) {
1965 netdev->features |= NETIF_F_HIGHDMA;
1966 netdev->vlan_features |= NETIF_F_HIGHDMA;
1969 if (hw->mac.type >= e1000_82576) {
1970 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1971 netdev->features |= NETIF_F_SCTP_CSUM;
1974 netdev->priv_flags |= IFF_UNICAST_FLT;
1976 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1978 /* before reading the NVM, reset the controller to put the device in a
1979 * known good starting state */
1980 hw->mac.ops.reset_hw(hw);
1982 /* make sure the NVM is good */
1983 if (hw->nvm.ops.validate(hw) < 0) {
1984 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1985 err = -EIO;
1986 goto err_eeprom;
1989 /* copy the MAC address out of the NVM */
1990 if (hw->mac.ops.read_mac_addr(hw))
1991 dev_err(&pdev->dev, "NVM Read Error\n");
1993 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1994 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1996 if (!is_valid_ether_addr(netdev->perm_addr)) {
1997 dev_err(&pdev->dev, "Invalid MAC Address\n");
1998 err = -EIO;
1999 goto err_eeprom;
2002 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2003 (unsigned long) adapter);
2004 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2005 (unsigned long) adapter);
2007 INIT_WORK(&adapter->reset_task, igb_reset_task);
2008 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2010 /* Initialize link properties that are user-changeable */
2011 adapter->fc_autoneg = true;
2012 hw->mac.autoneg = true;
2013 hw->phy.autoneg_advertised = 0x2f;
2015 hw->fc.requested_mode = e1000_fc_default;
2016 hw->fc.current_mode = e1000_fc_default;
2018 igb_validate_mdi_setting(hw);
2020 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2021 * enable the ACPI Magic Packet filter
2024 if (hw->bus.func == 0)
2025 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2026 else if (hw->mac.type >= e1000_82580)
2027 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2028 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2029 &eeprom_data);
2030 else if (hw->bus.func == 1)
2031 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2033 if (eeprom_data & eeprom_apme_mask)
2034 adapter->eeprom_wol |= E1000_WUFC_MAG;
2036 /* now that we have the eeprom settings, apply the special cases where
2037 * the eeprom may be wrong or the board simply won't support wake on
2038 * lan on a particular port */
2039 switch (pdev->device) {
2040 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2041 adapter->eeprom_wol = 0;
2042 break;
2043 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2044 case E1000_DEV_ID_82576_FIBER:
2045 case E1000_DEV_ID_82576_SERDES:
2046 /* Wake events only supported on port A for dual fiber
2047 * regardless of eeprom setting */
2048 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2049 adapter->eeprom_wol = 0;
2050 break;
2051 case E1000_DEV_ID_82576_QUAD_COPPER:
2052 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2053 /* if quad port adapter, disable WoL on all but port A */
2054 if (global_quad_port_a != 0)
2055 adapter->eeprom_wol = 0;
2056 else
2057 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2058 /* Reset for multiple quad port adapters */
2059 if (++global_quad_port_a == 4)
2060 global_quad_port_a = 0;
2061 break;
2064 /* initialize the wol settings based on the eeprom settings */
2065 adapter->wol = adapter->eeprom_wol;
2066 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2068 /* reset the hardware with the new settings */
2069 igb_reset(adapter);
2071 /* let the f/w know that the h/w is now under the control of the
2072 * driver. */
2073 igb_get_hw_control(adapter);
2075 strcpy(netdev->name, "eth%d");
2076 err = register_netdev(netdev);
2077 if (err)
2078 goto err_register;
2080 /* carrier off reporting is important to ethtool even BEFORE open */
2081 netif_carrier_off(netdev);
2083 #ifdef CONFIG_IGB_DCA
2084 if (dca_add_requester(&pdev->dev) == 0) {
2085 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2086 dev_info(&pdev->dev, "DCA enabled\n");
2087 igb_setup_dca(adapter);
2090 #endif
2091 /* do hw tstamp init after resetting */
2092 igb_init_hw_timer(adapter);
2094 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2095 /* print bus type/speed/width info */
2096 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2097 netdev->name,
2098 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2099 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2100 "unknown"),
2101 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2102 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2103 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2104 "unknown"),
2105 netdev->dev_addr);
2107 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2108 if (ret_val)
2109 strcpy(part_str, "Unknown");
2110 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2111 dev_info(&pdev->dev,
2112 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2113 adapter->msix_entries ? "MSI-X" :
2114 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2115 adapter->num_rx_queues, adapter->num_tx_queues);
2116 switch (hw->mac.type) {
2117 case e1000_i350:
2118 igb_set_eee_i350(hw);
2119 break;
2120 default:
2121 break;
2124 pm_runtime_put_noidle(&pdev->dev);
2125 return 0;
2127 err_register:
2128 igb_release_hw_control(adapter);
2129 err_eeprom:
2130 if (!igb_check_reset_block(hw))
2131 igb_reset_phy(hw);
2133 if (hw->flash_address)
2134 iounmap(hw->flash_address);
2135 err_sw_init:
2136 igb_clear_interrupt_scheme(adapter);
2137 iounmap(hw->hw_addr);
2138 err_ioremap:
2139 free_netdev(netdev);
2140 err_alloc_etherdev:
2141 pci_release_selected_regions(pdev,
2142 pci_select_bars(pdev, IORESOURCE_MEM));
2143 err_pci_reg:
2144 err_dma:
2145 pci_disable_device(pdev);
2146 return err;
2150 * igb_remove - Device Removal Routine
2151 * @pdev: PCI device information struct
2153 * igb_remove is called by the PCI subsystem to alert the driver
2154 * that it should release a PCI device. The could be caused by a
2155 * Hot-Plug event, or because the driver is going to be removed from
2156 * memory.
2158 static void __devexit igb_remove(struct pci_dev *pdev)
2160 struct net_device *netdev = pci_get_drvdata(pdev);
2161 struct igb_adapter *adapter = netdev_priv(netdev);
2162 struct e1000_hw *hw = &adapter->hw;
2164 pm_runtime_get_noresume(&pdev->dev);
2167 * The watchdog timer may be rescheduled, so explicitly
2168 * disable watchdog from being rescheduled.
2170 set_bit(__IGB_DOWN, &adapter->state);
2171 del_timer_sync(&adapter->watchdog_timer);
2172 del_timer_sync(&adapter->phy_info_timer);
2174 cancel_work_sync(&adapter->reset_task);
2175 cancel_work_sync(&adapter->watchdog_task);
2177 #ifdef CONFIG_IGB_DCA
2178 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2179 dev_info(&pdev->dev, "DCA disabled\n");
2180 dca_remove_requester(&pdev->dev);
2181 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2182 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2184 #endif
2186 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2187 * would have already happened in close and is redundant. */
2188 igb_release_hw_control(adapter);
2190 unregister_netdev(netdev);
2192 igb_clear_interrupt_scheme(adapter);
2194 #ifdef CONFIG_PCI_IOV
2195 /* reclaim resources allocated to VFs */
2196 if (adapter->vf_data) {
2197 /* disable iov and allow time for transactions to clear */
2198 if (!igb_check_vf_assignment(adapter)) {
2199 pci_disable_sriov(pdev);
2200 msleep(500);
2201 } else {
2202 dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2205 kfree(adapter->vf_data);
2206 adapter->vf_data = NULL;
2207 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2208 wrfl();
2209 msleep(100);
2210 dev_info(&pdev->dev, "IOV Disabled\n");
2212 #endif
2214 iounmap(hw->hw_addr);
2215 if (hw->flash_address)
2216 iounmap(hw->flash_address);
2217 pci_release_selected_regions(pdev,
2218 pci_select_bars(pdev, IORESOURCE_MEM));
2220 kfree(adapter->shadow_vfta);
2221 free_netdev(netdev);
2223 pci_disable_pcie_error_reporting(pdev);
2225 pci_disable_device(pdev);
2229 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2230 * @adapter: board private structure to initialize
2232 * This function initializes the vf specific data storage and then attempts to
2233 * allocate the VFs. The reason for ordering it this way is because it is much
2234 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2235 * the memory for the VFs.
2237 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2239 #ifdef CONFIG_PCI_IOV
2240 struct pci_dev *pdev = adapter->pdev;
2241 int old_vfs = igb_find_enabled_vfs(adapter);
2242 int i;
2244 if (old_vfs) {
2245 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2246 "max_vfs setting of %d\n", old_vfs, max_vfs);
2247 adapter->vfs_allocated_count = old_vfs;
2250 if (!adapter->vfs_allocated_count)
2251 return;
2253 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2254 sizeof(struct vf_data_storage), GFP_KERNEL);
2255 /* if allocation failed then we do not support SR-IOV */
2256 if (!adapter->vf_data) {
2257 adapter->vfs_allocated_count = 0;
2258 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2259 "Data Storage\n");
2260 goto out;
2263 if (!old_vfs) {
2264 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2265 goto err_out;
2267 dev_info(&pdev->dev, "%d VFs allocated\n",
2268 adapter->vfs_allocated_count);
2269 for (i = 0; i < adapter->vfs_allocated_count; i++)
2270 igb_vf_configure(adapter, i);
2272 /* DMA Coalescing is not supported in IOV mode. */
2273 adapter->flags &= ~IGB_FLAG_DMAC;
2274 goto out;
2275 err_out:
2276 kfree(adapter->vf_data);
2277 adapter->vf_data = NULL;
2278 adapter->vfs_allocated_count = 0;
2279 out:
2280 return;
2281 #endif /* CONFIG_PCI_IOV */
2285 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2286 * @adapter: board private structure to initialize
2288 * igb_init_hw_timer initializes the function pointer and values for the hw
2289 * timer found in hardware.
2291 static void igb_init_hw_timer(struct igb_adapter *adapter)
2293 struct e1000_hw *hw = &adapter->hw;
2295 switch (hw->mac.type) {
2296 case e1000_i350:
2297 case e1000_82580:
2298 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2299 adapter->cycles.read = igb_read_clock;
2300 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2301 adapter->cycles.mult = 1;
2303 * The 82580 timesync updates the system timer every 8ns by 8ns
2304 * and the value cannot be shifted. Instead we need to shift
2305 * the registers to generate a 64bit timer value. As a result
2306 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2307 * 24 in order to generate a larger value for synchronization.
2309 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2310 /* disable system timer temporarily by setting bit 31 */
2311 wr32(E1000_TSAUXC, 0x80000000);
2312 wrfl();
2314 /* Set registers so that rollover occurs soon to test this. */
2315 wr32(E1000_SYSTIMR, 0x00000000);
2316 wr32(E1000_SYSTIML, 0x80000000);
2317 wr32(E1000_SYSTIMH, 0x000000FF);
2318 wrfl();
2320 /* enable system timer by clearing bit 31 */
2321 wr32(E1000_TSAUXC, 0x0);
2322 wrfl();
2324 timecounter_init(&adapter->clock,
2325 &adapter->cycles,
2326 ktime_to_ns(ktime_get_real()));
2328 * Synchronize our NIC clock against system wall clock. NIC
2329 * time stamp reading requires ~3us per sample, each sample
2330 * was pretty stable even under load => only require 10
2331 * samples for each offset comparison.
2333 memset(&adapter->compare, 0, sizeof(adapter->compare));
2334 adapter->compare.source = &adapter->clock;
2335 adapter->compare.target = ktime_get_real;
2336 adapter->compare.num_samples = 10;
2337 timecompare_update(&adapter->compare, 0);
2338 break;
2339 case e1000_82576:
2341 * Initialize hardware timer: we keep it running just in case
2342 * that some program needs it later on.
2344 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2345 adapter->cycles.read = igb_read_clock;
2346 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2347 adapter->cycles.mult = 1;
2349 * Scale the NIC clock cycle by a large factor so that
2350 * relatively small clock corrections can be added or
2351 * subtracted at each clock tick. The drawbacks of a large
2352 * factor are a) that the clock register overflows more quickly
2353 * (not such a big deal) and b) that the increment per tick has
2354 * to fit into 24 bits. As a result we need to use a shift of
2355 * 19 so we can fit a value of 16 into the TIMINCA register.
2357 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2358 wr32(E1000_TIMINCA,
2359 (1 << E1000_TIMINCA_16NS_SHIFT) |
2360 (16 << IGB_82576_TSYNC_SHIFT));
2362 /* Set registers so that rollover occurs soon to test this. */
2363 wr32(E1000_SYSTIML, 0x00000000);
2364 wr32(E1000_SYSTIMH, 0xFF800000);
2365 wrfl();
2367 timecounter_init(&adapter->clock,
2368 &adapter->cycles,
2369 ktime_to_ns(ktime_get_real()));
2371 * Synchronize our NIC clock against system wall clock. NIC
2372 * time stamp reading requires ~3us per sample, each sample
2373 * was pretty stable even under load => only require 10
2374 * samples for each offset comparison.
2376 memset(&adapter->compare, 0, sizeof(adapter->compare));
2377 adapter->compare.source = &adapter->clock;
2378 adapter->compare.target = ktime_get_real;
2379 adapter->compare.num_samples = 10;
2380 timecompare_update(&adapter->compare, 0);
2381 break;
2382 case e1000_82575:
2383 /* 82575 does not support timesync */
2384 default:
2385 break;
2391 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2392 * @adapter: board private structure to initialize
2394 * igb_sw_init initializes the Adapter private data structure.
2395 * Fields are initialized based on PCI device information and
2396 * OS network device settings (MTU size).
2398 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2400 struct e1000_hw *hw = &adapter->hw;
2401 struct net_device *netdev = adapter->netdev;
2402 struct pci_dev *pdev = adapter->pdev;
2404 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2406 /* set default ring sizes */
2407 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2408 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2410 /* set default ITR values */
2411 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2412 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2414 /* set default work limits */
2415 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2417 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2418 VLAN_HLEN;
2419 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2421 adapter->node = -1;
2423 spin_lock_init(&adapter->stats64_lock);
2424 #ifdef CONFIG_PCI_IOV
2425 switch (hw->mac.type) {
2426 case e1000_82576:
2427 case e1000_i350:
2428 if (max_vfs > 7) {
2429 dev_warn(&pdev->dev,
2430 "Maximum of 7 VFs per PF, using max\n");
2431 adapter->vfs_allocated_count = 7;
2432 } else
2433 adapter->vfs_allocated_count = max_vfs;
2434 break;
2435 default:
2436 break;
2438 #endif /* CONFIG_PCI_IOV */
2439 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2440 /* i350 cannot do RSS and SR-IOV at the same time */
2441 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2442 adapter->rss_queues = 1;
2445 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2446 * then we should combine the queues into a queue pair in order to
2447 * conserve interrupts due to limited supply
2449 if ((adapter->rss_queues > 4) ||
2450 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2451 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2453 /* Setup and initialize a copy of the hw vlan table array */
2454 adapter->shadow_vfta = kzalloc(sizeof(u32) *
2455 E1000_VLAN_FILTER_TBL_SIZE,
2456 GFP_ATOMIC);
2458 /* This call may decrease the number of queues */
2459 if (igb_init_interrupt_scheme(adapter)) {
2460 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2461 return -ENOMEM;
2464 igb_probe_vfs(adapter);
2466 /* Explicitly disable IRQ since the NIC can be in any state. */
2467 igb_irq_disable(adapter);
2469 if (hw->mac.type == e1000_i350)
2470 adapter->flags &= ~IGB_FLAG_DMAC;
2472 set_bit(__IGB_DOWN, &adapter->state);
2473 return 0;
2477 * igb_open - Called when a network interface is made active
2478 * @netdev: network interface device structure
2480 * Returns 0 on success, negative value on failure
2482 * The open entry point is called when a network interface is made
2483 * active by the system (IFF_UP). At this point all resources needed
2484 * for transmit and receive operations are allocated, the interrupt
2485 * handler is registered with the OS, the watchdog timer is started,
2486 * and the stack is notified that the interface is ready.
2488 static int __igb_open(struct net_device *netdev, bool resuming)
2490 struct igb_adapter *adapter = netdev_priv(netdev);
2491 struct e1000_hw *hw = &adapter->hw;
2492 struct pci_dev *pdev = adapter->pdev;
2493 int err;
2494 int i;
2496 /* disallow open during test */
2497 if (test_bit(__IGB_TESTING, &adapter->state)) {
2498 WARN_ON(resuming);
2499 return -EBUSY;
2502 if (!resuming)
2503 pm_runtime_get_sync(&pdev->dev);
2505 netif_carrier_off(netdev);
2507 /* allocate transmit descriptors */
2508 err = igb_setup_all_tx_resources(adapter);
2509 if (err)
2510 goto err_setup_tx;
2512 /* allocate receive descriptors */
2513 err = igb_setup_all_rx_resources(adapter);
2514 if (err)
2515 goto err_setup_rx;
2517 igb_power_up_link(adapter);
2519 /* before we allocate an interrupt, we must be ready to handle it.
2520 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2521 * as soon as we call pci_request_irq, so we have to setup our
2522 * clean_rx handler before we do so. */
2523 igb_configure(adapter);
2525 err = igb_request_irq(adapter);
2526 if (err)
2527 goto err_req_irq;
2529 /* From here on the code is the same as igb_up() */
2530 clear_bit(__IGB_DOWN, &adapter->state);
2532 for (i = 0; i < adapter->num_q_vectors; i++)
2533 napi_enable(&(adapter->q_vector[i]->napi));
2535 /* Clear any pending interrupts. */
2536 rd32(E1000_ICR);
2538 igb_irq_enable(adapter);
2540 /* notify VFs that reset has been completed */
2541 if (adapter->vfs_allocated_count) {
2542 u32 reg_data = rd32(E1000_CTRL_EXT);
2543 reg_data |= E1000_CTRL_EXT_PFRSTD;
2544 wr32(E1000_CTRL_EXT, reg_data);
2547 netif_tx_start_all_queues(netdev);
2549 if (!resuming)
2550 pm_runtime_put(&pdev->dev);
2552 /* start the watchdog. */
2553 hw->mac.get_link_status = 1;
2554 schedule_work(&adapter->watchdog_task);
2556 return 0;
2558 err_req_irq:
2559 igb_release_hw_control(adapter);
2560 igb_power_down_link(adapter);
2561 igb_free_all_rx_resources(adapter);
2562 err_setup_rx:
2563 igb_free_all_tx_resources(adapter);
2564 err_setup_tx:
2565 igb_reset(adapter);
2566 if (!resuming)
2567 pm_runtime_put(&pdev->dev);
2569 return err;
2572 static int igb_open(struct net_device *netdev)
2574 return __igb_open(netdev, false);
2578 * igb_close - Disables a network interface
2579 * @netdev: network interface device structure
2581 * Returns 0, this is not allowed to fail
2583 * The close entry point is called when an interface is de-activated
2584 * by the OS. The hardware is still under the driver's control, but
2585 * needs to be disabled. A global MAC reset is issued to stop the
2586 * hardware, and all transmit and receive resources are freed.
2588 static int __igb_close(struct net_device *netdev, bool suspending)
2590 struct igb_adapter *adapter = netdev_priv(netdev);
2591 struct pci_dev *pdev = adapter->pdev;
2593 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2595 if (!suspending)
2596 pm_runtime_get_sync(&pdev->dev);
2598 igb_down(adapter);
2599 igb_free_irq(adapter);
2601 igb_free_all_tx_resources(adapter);
2602 igb_free_all_rx_resources(adapter);
2604 if (!suspending)
2605 pm_runtime_put_sync(&pdev->dev);
2606 return 0;
2609 static int igb_close(struct net_device *netdev)
2611 return __igb_close(netdev, false);
2615 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2616 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2618 * Return 0 on success, negative on failure
2620 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2622 struct device *dev = tx_ring->dev;
2623 int orig_node = dev_to_node(dev);
2624 int size;
2626 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2627 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2628 if (!tx_ring->tx_buffer_info)
2629 tx_ring->tx_buffer_info = vzalloc(size);
2630 if (!tx_ring->tx_buffer_info)
2631 goto err;
2633 /* round up to nearest 4K */
2634 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2635 tx_ring->size = ALIGN(tx_ring->size, 4096);
2637 set_dev_node(dev, tx_ring->numa_node);
2638 tx_ring->desc = dma_alloc_coherent(dev,
2639 tx_ring->size,
2640 &tx_ring->dma,
2641 GFP_KERNEL);
2642 set_dev_node(dev, orig_node);
2643 if (!tx_ring->desc)
2644 tx_ring->desc = dma_alloc_coherent(dev,
2645 tx_ring->size,
2646 &tx_ring->dma,
2647 GFP_KERNEL);
2649 if (!tx_ring->desc)
2650 goto err;
2652 tx_ring->next_to_use = 0;
2653 tx_ring->next_to_clean = 0;
2655 return 0;
2657 err:
2658 vfree(tx_ring->tx_buffer_info);
2659 dev_err(dev,
2660 "Unable to allocate memory for the transmit descriptor ring\n");
2661 return -ENOMEM;
2665 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2666 * (Descriptors) for all queues
2667 * @adapter: board private structure
2669 * Return 0 on success, negative on failure
2671 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2673 struct pci_dev *pdev = adapter->pdev;
2674 int i, err = 0;
2676 for (i = 0; i < adapter->num_tx_queues; i++) {
2677 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2678 if (err) {
2679 dev_err(&pdev->dev,
2680 "Allocation for Tx Queue %u failed\n", i);
2681 for (i--; i >= 0; i--)
2682 igb_free_tx_resources(adapter->tx_ring[i]);
2683 break;
2687 return err;
2691 * igb_setup_tctl - configure the transmit control registers
2692 * @adapter: Board private structure
2694 void igb_setup_tctl(struct igb_adapter *adapter)
2696 struct e1000_hw *hw = &adapter->hw;
2697 u32 tctl;
2699 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2700 wr32(E1000_TXDCTL(0), 0);
2702 /* Program the Transmit Control Register */
2703 tctl = rd32(E1000_TCTL);
2704 tctl &= ~E1000_TCTL_CT;
2705 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2706 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2708 igb_config_collision_dist(hw);
2710 /* Enable transmits */
2711 tctl |= E1000_TCTL_EN;
2713 wr32(E1000_TCTL, tctl);
2717 * igb_configure_tx_ring - Configure transmit ring after Reset
2718 * @adapter: board private structure
2719 * @ring: tx ring to configure
2721 * Configure a transmit ring after a reset.
2723 void igb_configure_tx_ring(struct igb_adapter *adapter,
2724 struct igb_ring *ring)
2726 struct e1000_hw *hw = &adapter->hw;
2727 u32 txdctl = 0;
2728 u64 tdba = ring->dma;
2729 int reg_idx = ring->reg_idx;
2731 /* disable the queue */
2732 wr32(E1000_TXDCTL(reg_idx), 0);
2733 wrfl();
2734 mdelay(10);
2736 wr32(E1000_TDLEN(reg_idx),
2737 ring->count * sizeof(union e1000_adv_tx_desc));
2738 wr32(E1000_TDBAL(reg_idx),
2739 tdba & 0x00000000ffffffffULL);
2740 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2742 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2743 wr32(E1000_TDH(reg_idx), 0);
2744 writel(0, ring->tail);
2746 txdctl |= IGB_TX_PTHRESH;
2747 txdctl |= IGB_TX_HTHRESH << 8;
2748 txdctl |= IGB_TX_WTHRESH << 16;
2750 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2751 wr32(E1000_TXDCTL(reg_idx), txdctl);
2755 * igb_configure_tx - Configure transmit Unit after Reset
2756 * @adapter: board private structure
2758 * Configure the Tx unit of the MAC after a reset.
2760 static void igb_configure_tx(struct igb_adapter *adapter)
2762 int i;
2764 for (i = 0; i < adapter->num_tx_queues; i++)
2765 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2769 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2770 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2772 * Returns 0 on success, negative on failure
2774 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2776 struct device *dev = rx_ring->dev;
2777 int orig_node = dev_to_node(dev);
2778 int size, desc_len;
2780 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2781 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2782 if (!rx_ring->rx_buffer_info)
2783 rx_ring->rx_buffer_info = vzalloc(size);
2784 if (!rx_ring->rx_buffer_info)
2785 goto err;
2787 desc_len = sizeof(union e1000_adv_rx_desc);
2789 /* Round up to nearest 4K */
2790 rx_ring->size = rx_ring->count * desc_len;
2791 rx_ring->size = ALIGN(rx_ring->size, 4096);
2793 set_dev_node(dev, rx_ring->numa_node);
2794 rx_ring->desc = dma_alloc_coherent(dev,
2795 rx_ring->size,
2796 &rx_ring->dma,
2797 GFP_KERNEL);
2798 set_dev_node(dev, orig_node);
2799 if (!rx_ring->desc)
2800 rx_ring->desc = dma_alloc_coherent(dev,
2801 rx_ring->size,
2802 &rx_ring->dma,
2803 GFP_KERNEL);
2805 if (!rx_ring->desc)
2806 goto err;
2808 rx_ring->next_to_clean = 0;
2809 rx_ring->next_to_use = 0;
2811 return 0;
2813 err:
2814 vfree(rx_ring->rx_buffer_info);
2815 rx_ring->rx_buffer_info = NULL;
2816 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2817 " ring\n");
2818 return -ENOMEM;
2822 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2823 * (Descriptors) for all queues
2824 * @adapter: board private structure
2826 * Return 0 on success, negative on failure
2828 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2830 struct pci_dev *pdev = adapter->pdev;
2831 int i, err = 0;
2833 for (i = 0; i < adapter->num_rx_queues; i++) {
2834 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2835 if (err) {
2836 dev_err(&pdev->dev,
2837 "Allocation for Rx Queue %u failed\n", i);
2838 for (i--; i >= 0; i--)
2839 igb_free_rx_resources(adapter->rx_ring[i]);
2840 break;
2844 return err;
2848 * igb_setup_mrqc - configure the multiple receive queue control registers
2849 * @adapter: Board private structure
2851 static void igb_setup_mrqc(struct igb_adapter *adapter)
2853 struct e1000_hw *hw = &adapter->hw;
2854 u32 mrqc, rxcsum;
2855 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2856 union e1000_reta {
2857 u32 dword;
2858 u8 bytes[4];
2859 } reta;
2860 static const u8 rsshash[40] = {
2861 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2862 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2863 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2864 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2866 /* Fill out hash function seeds */
2867 for (j = 0; j < 10; j++) {
2868 u32 rsskey = rsshash[(j * 4)];
2869 rsskey |= rsshash[(j * 4) + 1] << 8;
2870 rsskey |= rsshash[(j * 4) + 2] << 16;
2871 rsskey |= rsshash[(j * 4) + 3] << 24;
2872 array_wr32(E1000_RSSRK(0), j, rsskey);
2875 num_rx_queues = adapter->rss_queues;
2877 if (adapter->vfs_allocated_count) {
2878 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2879 switch (hw->mac.type) {
2880 case e1000_i350:
2881 case e1000_82580:
2882 num_rx_queues = 1;
2883 shift = 0;
2884 break;
2885 case e1000_82576:
2886 shift = 3;
2887 num_rx_queues = 2;
2888 break;
2889 case e1000_82575:
2890 shift = 2;
2891 shift2 = 6;
2892 default:
2893 break;
2895 } else {
2896 if (hw->mac.type == e1000_82575)
2897 shift = 6;
2900 for (j = 0; j < (32 * 4); j++) {
2901 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2902 if (shift2)
2903 reta.bytes[j & 3] |= num_rx_queues << shift2;
2904 if ((j & 3) == 3)
2905 wr32(E1000_RETA(j >> 2), reta.dword);
2909 * Disable raw packet checksumming so that RSS hash is placed in
2910 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2911 * offloads as they are enabled by default
2913 rxcsum = rd32(E1000_RXCSUM);
2914 rxcsum |= E1000_RXCSUM_PCSD;
2916 if (adapter->hw.mac.type >= e1000_82576)
2917 /* Enable Receive Checksum Offload for SCTP */
2918 rxcsum |= E1000_RXCSUM_CRCOFL;
2920 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2921 wr32(E1000_RXCSUM, rxcsum);
2923 /* If VMDq is enabled then we set the appropriate mode for that, else
2924 * we default to RSS so that an RSS hash is calculated per packet even
2925 * if we are only using one queue */
2926 if (adapter->vfs_allocated_count) {
2927 if (hw->mac.type > e1000_82575) {
2928 /* Set the default pool for the PF's first queue */
2929 u32 vtctl = rd32(E1000_VT_CTL);
2930 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2931 E1000_VT_CTL_DISABLE_DEF_POOL);
2932 vtctl |= adapter->vfs_allocated_count <<
2933 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2934 wr32(E1000_VT_CTL, vtctl);
2936 if (adapter->rss_queues > 1)
2937 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2938 else
2939 mrqc = E1000_MRQC_ENABLE_VMDQ;
2940 } else {
2941 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2943 igb_vmm_control(adapter);
2946 * Generate RSS hash based on TCP port numbers and/or
2947 * IPv4/v6 src and dst addresses since UDP cannot be
2948 * hashed reliably due to IP fragmentation
2950 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2951 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2952 E1000_MRQC_RSS_FIELD_IPV6 |
2953 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2954 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2956 wr32(E1000_MRQC, mrqc);
2960 * igb_setup_rctl - configure the receive control registers
2961 * @adapter: Board private structure
2963 void igb_setup_rctl(struct igb_adapter *adapter)
2965 struct e1000_hw *hw = &adapter->hw;
2966 u32 rctl;
2968 rctl = rd32(E1000_RCTL);
2970 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2971 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2973 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2974 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2977 * enable stripping of CRC. It's unlikely this will break BMC
2978 * redirection as it did with e1000. Newer features require
2979 * that the HW strips the CRC.
2981 rctl |= E1000_RCTL_SECRC;
2983 /* disable store bad packets and clear size bits. */
2984 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2986 /* enable LPE to prevent packets larger than max_frame_size */
2987 rctl |= E1000_RCTL_LPE;
2989 /* disable queue 0 to prevent tail write w/o re-config */
2990 wr32(E1000_RXDCTL(0), 0);
2992 /* Attention!!! For SR-IOV PF driver operations you must enable
2993 * queue drop for all VF and PF queues to prevent head of line blocking
2994 * if an un-trusted VF does not provide descriptors to hardware.
2996 if (adapter->vfs_allocated_count) {
2997 /* set all queue drop enable bits */
2998 wr32(E1000_QDE, ALL_QUEUES);
3001 wr32(E1000_RCTL, rctl);
3004 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3005 int vfn)
3007 struct e1000_hw *hw = &adapter->hw;
3008 u32 vmolr;
3010 /* if it isn't the PF check to see if VFs are enabled and
3011 * increase the size to support vlan tags */
3012 if (vfn < adapter->vfs_allocated_count &&
3013 adapter->vf_data[vfn].vlans_enabled)
3014 size += VLAN_TAG_SIZE;
3016 vmolr = rd32(E1000_VMOLR(vfn));
3017 vmolr &= ~E1000_VMOLR_RLPML_MASK;
3018 vmolr |= size | E1000_VMOLR_LPE;
3019 wr32(E1000_VMOLR(vfn), vmolr);
3021 return 0;
3025 * igb_rlpml_set - set maximum receive packet size
3026 * @adapter: board private structure
3028 * Configure maximum receivable packet size.
3030 static void igb_rlpml_set(struct igb_adapter *adapter)
3032 u32 max_frame_size = adapter->max_frame_size;
3033 struct e1000_hw *hw = &adapter->hw;
3034 u16 pf_id = adapter->vfs_allocated_count;
3036 if (pf_id) {
3037 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3039 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3040 * to our max jumbo frame size, in case we need to enable
3041 * jumbo frames on one of the rings later.
3042 * This will not pass over-length frames into the default
3043 * queue because it's gated by the VMOLR.RLPML.
3045 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3048 wr32(E1000_RLPML, max_frame_size);
3051 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3052 int vfn, bool aupe)
3054 struct e1000_hw *hw = &adapter->hw;
3055 u32 vmolr;
3058 * This register exists only on 82576 and newer so if we are older then
3059 * we should exit and do nothing
3061 if (hw->mac.type < e1000_82576)
3062 return;
3064 vmolr = rd32(E1000_VMOLR(vfn));
3065 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3066 if (aupe)
3067 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3068 else
3069 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3071 /* clear all bits that might not be set */
3072 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3074 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3075 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3077 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3078 * multicast packets
3080 if (vfn <= adapter->vfs_allocated_count)
3081 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3083 wr32(E1000_VMOLR(vfn), vmolr);
3087 * igb_configure_rx_ring - Configure a receive ring after Reset
3088 * @adapter: board private structure
3089 * @ring: receive ring to be configured
3091 * Configure the Rx unit of the MAC after a reset.
3093 void igb_configure_rx_ring(struct igb_adapter *adapter,
3094 struct igb_ring *ring)
3096 struct e1000_hw *hw = &adapter->hw;
3097 u64 rdba = ring->dma;
3098 int reg_idx = ring->reg_idx;
3099 u32 srrctl = 0, rxdctl = 0;
3101 /* disable the queue */
3102 wr32(E1000_RXDCTL(reg_idx), 0);
3104 /* Set DMA base address registers */
3105 wr32(E1000_RDBAL(reg_idx),
3106 rdba & 0x00000000ffffffffULL);
3107 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3108 wr32(E1000_RDLEN(reg_idx),
3109 ring->count * sizeof(union e1000_adv_rx_desc));
3111 /* initialize head and tail */
3112 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3113 wr32(E1000_RDH(reg_idx), 0);
3114 writel(0, ring->tail);
3116 /* set descriptor configuration */
3117 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3118 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3119 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3120 #else
3121 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3122 #endif
3123 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3124 if (hw->mac.type >= e1000_82580)
3125 srrctl |= E1000_SRRCTL_TIMESTAMP;
3126 /* Only set Drop Enable if we are supporting multiple queues */
3127 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3128 srrctl |= E1000_SRRCTL_DROP_EN;
3130 wr32(E1000_SRRCTL(reg_idx), srrctl);
3132 /* set filtering for VMDQ pools */
3133 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3135 rxdctl |= IGB_RX_PTHRESH;
3136 rxdctl |= IGB_RX_HTHRESH << 8;
3137 rxdctl |= IGB_RX_WTHRESH << 16;
3139 /* enable receive descriptor fetching */
3140 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3141 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3145 * igb_configure_rx - Configure receive Unit after Reset
3146 * @adapter: board private structure
3148 * Configure the Rx unit of the MAC after a reset.
3150 static void igb_configure_rx(struct igb_adapter *adapter)
3152 int i;
3154 /* set UTA to appropriate mode */
3155 igb_set_uta(adapter);
3157 /* set the correct pool for the PF default MAC address in entry 0 */
3158 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3159 adapter->vfs_allocated_count);
3161 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3162 * the Base and Length of the Rx Descriptor Ring */
3163 for (i = 0; i < adapter->num_rx_queues; i++)
3164 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3168 * igb_free_tx_resources - Free Tx Resources per Queue
3169 * @tx_ring: Tx descriptor ring for a specific queue
3171 * Free all transmit software resources
3173 void igb_free_tx_resources(struct igb_ring *tx_ring)
3175 igb_clean_tx_ring(tx_ring);
3177 vfree(tx_ring->tx_buffer_info);
3178 tx_ring->tx_buffer_info = NULL;
3180 /* if not set, then don't free */
3181 if (!tx_ring->desc)
3182 return;
3184 dma_free_coherent(tx_ring->dev, tx_ring->size,
3185 tx_ring->desc, tx_ring->dma);
3187 tx_ring->desc = NULL;
3191 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3192 * @adapter: board private structure
3194 * Free all transmit software resources
3196 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3198 int i;
3200 for (i = 0; i < adapter->num_tx_queues; i++)
3201 igb_free_tx_resources(adapter->tx_ring[i]);
3204 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3205 struct igb_tx_buffer *tx_buffer)
3207 if (tx_buffer->skb) {
3208 dev_kfree_skb_any(tx_buffer->skb);
3209 if (tx_buffer->dma)
3210 dma_unmap_single(ring->dev,
3211 tx_buffer->dma,
3212 tx_buffer->length,
3213 DMA_TO_DEVICE);
3214 } else if (tx_buffer->dma) {
3215 dma_unmap_page(ring->dev,
3216 tx_buffer->dma,
3217 tx_buffer->length,
3218 DMA_TO_DEVICE);
3220 tx_buffer->next_to_watch = NULL;
3221 tx_buffer->skb = NULL;
3222 tx_buffer->dma = 0;
3223 /* buffer_info must be completely set up in the transmit path */
3227 * igb_clean_tx_ring - Free Tx Buffers
3228 * @tx_ring: ring to be cleaned
3230 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3232 struct igb_tx_buffer *buffer_info;
3233 unsigned long size;
3234 u16 i;
3236 if (!tx_ring->tx_buffer_info)
3237 return;
3238 /* Free all the Tx ring sk_buffs */
3240 for (i = 0; i < tx_ring->count; i++) {
3241 buffer_info = &tx_ring->tx_buffer_info[i];
3242 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3244 netdev_tx_reset_queue(txring_txq(tx_ring));
3246 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3247 memset(tx_ring->tx_buffer_info, 0, size);
3249 /* Zero out the descriptor ring */
3250 memset(tx_ring->desc, 0, tx_ring->size);
3252 tx_ring->next_to_use = 0;
3253 tx_ring->next_to_clean = 0;
3257 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3258 * @adapter: board private structure
3260 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3262 int i;
3264 for (i = 0; i < adapter->num_tx_queues; i++)
3265 igb_clean_tx_ring(adapter->tx_ring[i]);
3269 * igb_free_rx_resources - Free Rx Resources
3270 * @rx_ring: ring to clean the resources from
3272 * Free all receive software resources
3274 void igb_free_rx_resources(struct igb_ring *rx_ring)
3276 igb_clean_rx_ring(rx_ring);
3278 vfree(rx_ring->rx_buffer_info);
3279 rx_ring->rx_buffer_info = NULL;
3281 /* if not set, then don't free */
3282 if (!rx_ring->desc)
3283 return;
3285 dma_free_coherent(rx_ring->dev, rx_ring->size,
3286 rx_ring->desc, rx_ring->dma);
3288 rx_ring->desc = NULL;
3292 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3293 * @adapter: board private structure
3295 * Free all receive software resources
3297 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3299 int i;
3301 for (i = 0; i < adapter->num_rx_queues; i++)
3302 igb_free_rx_resources(adapter->rx_ring[i]);
3306 * igb_clean_rx_ring - Free Rx Buffers per Queue
3307 * @rx_ring: ring to free buffers from
3309 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3311 unsigned long size;
3312 u16 i;
3314 if (!rx_ring->rx_buffer_info)
3315 return;
3317 /* Free all the Rx ring sk_buffs */
3318 for (i = 0; i < rx_ring->count; i++) {
3319 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3320 if (buffer_info->dma) {
3321 dma_unmap_single(rx_ring->dev,
3322 buffer_info->dma,
3323 IGB_RX_HDR_LEN,
3324 DMA_FROM_DEVICE);
3325 buffer_info->dma = 0;
3328 if (buffer_info->skb) {
3329 dev_kfree_skb(buffer_info->skb);
3330 buffer_info->skb = NULL;
3332 if (buffer_info->page_dma) {
3333 dma_unmap_page(rx_ring->dev,
3334 buffer_info->page_dma,
3335 PAGE_SIZE / 2,
3336 DMA_FROM_DEVICE);
3337 buffer_info->page_dma = 0;
3339 if (buffer_info->page) {
3340 put_page(buffer_info->page);
3341 buffer_info->page = NULL;
3342 buffer_info->page_offset = 0;
3346 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3347 memset(rx_ring->rx_buffer_info, 0, size);
3349 /* Zero out the descriptor ring */
3350 memset(rx_ring->desc, 0, rx_ring->size);
3352 rx_ring->next_to_clean = 0;
3353 rx_ring->next_to_use = 0;
3357 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3358 * @adapter: board private structure
3360 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3362 int i;
3364 for (i = 0; i < adapter->num_rx_queues; i++)
3365 igb_clean_rx_ring(adapter->rx_ring[i]);
3369 * igb_set_mac - Change the Ethernet Address of the NIC
3370 * @netdev: network interface device structure
3371 * @p: pointer to an address structure
3373 * Returns 0 on success, negative on failure
3375 static int igb_set_mac(struct net_device *netdev, void *p)
3377 struct igb_adapter *adapter = netdev_priv(netdev);
3378 struct e1000_hw *hw = &adapter->hw;
3379 struct sockaddr *addr = p;
3381 if (!is_valid_ether_addr(addr->sa_data))
3382 return -EADDRNOTAVAIL;
3384 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3385 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3387 /* set the correct pool for the new PF MAC address in entry 0 */
3388 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3389 adapter->vfs_allocated_count);
3391 return 0;
3395 * igb_write_mc_addr_list - write multicast addresses to MTA
3396 * @netdev: network interface device structure
3398 * Writes multicast address list to the MTA hash table.
3399 * Returns: -ENOMEM on failure
3400 * 0 on no addresses written
3401 * X on writing X addresses to MTA
3403 static int igb_write_mc_addr_list(struct net_device *netdev)
3405 struct igb_adapter *adapter = netdev_priv(netdev);
3406 struct e1000_hw *hw = &adapter->hw;
3407 struct netdev_hw_addr *ha;
3408 u8 *mta_list;
3409 int i;
3411 if (netdev_mc_empty(netdev)) {
3412 /* nothing to program, so clear mc list */
3413 igb_update_mc_addr_list(hw, NULL, 0);
3414 igb_restore_vf_multicasts(adapter);
3415 return 0;
3418 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3419 if (!mta_list)
3420 return -ENOMEM;
3422 /* The shared function expects a packed array of only addresses. */
3423 i = 0;
3424 netdev_for_each_mc_addr(ha, netdev)
3425 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3427 igb_update_mc_addr_list(hw, mta_list, i);
3428 kfree(mta_list);
3430 return netdev_mc_count(netdev);
3434 * igb_write_uc_addr_list - write unicast addresses to RAR table
3435 * @netdev: network interface device structure
3437 * Writes unicast address list to the RAR table.
3438 * Returns: -ENOMEM on failure/insufficient address space
3439 * 0 on no addresses written
3440 * X on writing X addresses to the RAR table
3442 static int igb_write_uc_addr_list(struct net_device *netdev)
3444 struct igb_adapter *adapter = netdev_priv(netdev);
3445 struct e1000_hw *hw = &adapter->hw;
3446 unsigned int vfn = adapter->vfs_allocated_count;
3447 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3448 int count = 0;
3450 /* return ENOMEM indicating insufficient memory for addresses */
3451 if (netdev_uc_count(netdev) > rar_entries)
3452 return -ENOMEM;
3454 if (!netdev_uc_empty(netdev) && rar_entries) {
3455 struct netdev_hw_addr *ha;
3457 netdev_for_each_uc_addr(ha, netdev) {
3458 if (!rar_entries)
3459 break;
3460 igb_rar_set_qsel(adapter, ha->addr,
3461 rar_entries--,
3462 vfn);
3463 count++;
3466 /* write the addresses in reverse order to avoid write combining */
3467 for (; rar_entries > 0 ; rar_entries--) {
3468 wr32(E1000_RAH(rar_entries), 0);
3469 wr32(E1000_RAL(rar_entries), 0);
3471 wrfl();
3473 return count;
3477 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3478 * @netdev: network interface device structure
3480 * The set_rx_mode entry point is called whenever the unicast or multicast
3481 * address lists or the network interface flags are updated. This routine is
3482 * responsible for configuring the hardware for proper unicast, multicast,
3483 * promiscuous mode, and all-multi behavior.
3485 static void igb_set_rx_mode(struct net_device *netdev)
3487 struct igb_adapter *adapter = netdev_priv(netdev);
3488 struct e1000_hw *hw = &adapter->hw;
3489 unsigned int vfn = adapter->vfs_allocated_count;
3490 u32 rctl, vmolr = 0;
3491 int count;
3493 /* Check for Promiscuous and All Multicast modes */
3494 rctl = rd32(E1000_RCTL);
3496 /* clear the effected bits */
3497 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3499 if (netdev->flags & IFF_PROMISC) {
3500 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3501 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3502 } else {
3503 if (netdev->flags & IFF_ALLMULTI) {
3504 rctl |= E1000_RCTL_MPE;
3505 vmolr |= E1000_VMOLR_MPME;
3506 } else {
3508 * Write addresses to the MTA, if the attempt fails
3509 * then we should just turn on promiscuous mode so
3510 * that we can at least receive multicast traffic
3512 count = igb_write_mc_addr_list(netdev);
3513 if (count < 0) {
3514 rctl |= E1000_RCTL_MPE;
3515 vmolr |= E1000_VMOLR_MPME;
3516 } else if (count) {
3517 vmolr |= E1000_VMOLR_ROMPE;
3521 * Write addresses to available RAR registers, if there is not
3522 * sufficient space to store all the addresses then enable
3523 * unicast promiscuous mode
3525 count = igb_write_uc_addr_list(netdev);
3526 if (count < 0) {
3527 rctl |= E1000_RCTL_UPE;
3528 vmolr |= E1000_VMOLR_ROPE;
3530 rctl |= E1000_RCTL_VFE;
3532 wr32(E1000_RCTL, rctl);
3535 * In order to support SR-IOV and eventually VMDq it is necessary to set
3536 * the VMOLR to enable the appropriate modes. Without this workaround
3537 * we will have issues with VLAN tag stripping not being done for frames
3538 * that are only arriving because we are the default pool
3540 if (hw->mac.type < e1000_82576)
3541 return;
3543 vmolr |= rd32(E1000_VMOLR(vfn)) &
3544 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3545 wr32(E1000_VMOLR(vfn), vmolr);
3546 igb_restore_vf_multicasts(adapter);
3549 static void igb_check_wvbr(struct igb_adapter *adapter)
3551 struct e1000_hw *hw = &adapter->hw;
3552 u32 wvbr = 0;
3554 switch (hw->mac.type) {
3555 case e1000_82576:
3556 case e1000_i350:
3557 if (!(wvbr = rd32(E1000_WVBR)))
3558 return;
3559 break;
3560 default:
3561 break;
3564 adapter->wvbr |= wvbr;
3567 #define IGB_STAGGERED_QUEUE_OFFSET 8
3569 static void igb_spoof_check(struct igb_adapter *adapter)
3571 int j;
3573 if (!adapter->wvbr)
3574 return;
3576 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3577 if (adapter->wvbr & (1 << j) ||
3578 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3579 dev_warn(&adapter->pdev->dev,
3580 "Spoof event(s) detected on VF %d\n", j);
3581 adapter->wvbr &=
3582 ~((1 << j) |
3583 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3588 /* Need to wait a few seconds after link up to get diagnostic information from
3589 * the phy */
3590 static void igb_update_phy_info(unsigned long data)
3592 struct igb_adapter *adapter = (struct igb_adapter *) data;
3593 igb_get_phy_info(&adapter->hw);
3597 * igb_has_link - check shared code for link and determine up/down
3598 * @adapter: pointer to driver private info
3600 bool igb_has_link(struct igb_adapter *adapter)
3602 struct e1000_hw *hw = &adapter->hw;
3603 bool link_active = false;
3604 s32 ret_val = 0;
3606 /* get_link_status is set on LSC (link status) interrupt or
3607 * rx sequence error interrupt. get_link_status will stay
3608 * false until the e1000_check_for_link establishes link
3609 * for copper adapters ONLY
3611 switch (hw->phy.media_type) {
3612 case e1000_media_type_copper:
3613 if (hw->mac.get_link_status) {
3614 ret_val = hw->mac.ops.check_for_link(hw);
3615 link_active = !hw->mac.get_link_status;
3616 } else {
3617 link_active = true;
3619 break;
3620 case e1000_media_type_internal_serdes:
3621 ret_val = hw->mac.ops.check_for_link(hw);
3622 link_active = hw->mac.serdes_has_link;
3623 break;
3624 default:
3625 case e1000_media_type_unknown:
3626 break;
3629 return link_active;
3632 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3634 bool ret = false;
3635 u32 ctrl_ext, thstat;
3637 /* check for thermal sensor event on i350, copper only */
3638 if (hw->mac.type == e1000_i350) {
3639 thstat = rd32(E1000_THSTAT);
3640 ctrl_ext = rd32(E1000_CTRL_EXT);
3642 if ((hw->phy.media_type == e1000_media_type_copper) &&
3643 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3644 ret = !!(thstat & event);
3648 return ret;
3652 * igb_watchdog - Timer Call-back
3653 * @data: pointer to adapter cast into an unsigned long
3655 static void igb_watchdog(unsigned long data)
3657 struct igb_adapter *adapter = (struct igb_adapter *)data;
3658 /* Do the rest outside of interrupt context */
3659 schedule_work(&adapter->watchdog_task);
3662 static void igb_watchdog_task(struct work_struct *work)
3664 struct igb_adapter *adapter = container_of(work,
3665 struct igb_adapter,
3666 watchdog_task);
3667 struct e1000_hw *hw = &adapter->hw;
3668 struct net_device *netdev = adapter->netdev;
3669 u32 link;
3670 int i;
3672 link = igb_has_link(adapter);
3673 if (link) {
3674 /* Cancel scheduled suspend requests. */
3675 pm_runtime_resume(netdev->dev.parent);
3677 if (!netif_carrier_ok(netdev)) {
3678 u32 ctrl;
3679 hw->mac.ops.get_speed_and_duplex(hw,
3680 &adapter->link_speed,
3681 &adapter->link_duplex);
3683 ctrl = rd32(E1000_CTRL);
3684 /* Links status message must follow this format */
3685 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3686 "Duplex, Flow Control: %s\n",
3687 netdev->name,
3688 adapter->link_speed,
3689 adapter->link_duplex == FULL_DUPLEX ?
3690 "Full" : "Half",
3691 (ctrl & E1000_CTRL_TFCE) &&
3692 (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3693 (ctrl & E1000_CTRL_RFCE) ? "RX" :
3694 (ctrl & E1000_CTRL_TFCE) ? "TX" : "None");
3696 /* check for thermal sensor event */
3697 if (igb_thermal_sensor_event(hw,
3698 E1000_THSTAT_LINK_THROTTLE)) {
3699 netdev_info(netdev, "The network adapter link "
3700 "speed was downshifted because it "
3701 "overheated\n");
3704 /* adjust timeout factor according to speed/duplex */
3705 adapter->tx_timeout_factor = 1;
3706 switch (adapter->link_speed) {
3707 case SPEED_10:
3708 adapter->tx_timeout_factor = 14;
3709 break;
3710 case SPEED_100:
3711 /* maybe add some timeout factor ? */
3712 break;
3715 netif_carrier_on(netdev);
3717 igb_ping_all_vfs(adapter);
3718 igb_check_vf_rate_limit(adapter);
3720 /* link state has changed, schedule phy info update */
3721 if (!test_bit(__IGB_DOWN, &adapter->state))
3722 mod_timer(&adapter->phy_info_timer,
3723 round_jiffies(jiffies + 2 * HZ));
3725 } else {
3726 if (netif_carrier_ok(netdev)) {
3727 adapter->link_speed = 0;
3728 adapter->link_duplex = 0;
3730 /* check for thermal sensor event */
3731 if (igb_thermal_sensor_event(hw,
3732 E1000_THSTAT_PWR_DOWN)) {
3733 netdev_err(netdev, "The network adapter was "
3734 "stopped because it overheated\n");
3737 /* Links status message must follow this format */
3738 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3739 netdev->name);
3740 netif_carrier_off(netdev);
3742 igb_ping_all_vfs(adapter);
3744 /* link state has changed, schedule phy info update */
3745 if (!test_bit(__IGB_DOWN, &adapter->state))
3746 mod_timer(&adapter->phy_info_timer,
3747 round_jiffies(jiffies + 2 * HZ));
3749 pm_schedule_suspend(netdev->dev.parent,
3750 MSEC_PER_SEC * 5);
3754 spin_lock(&adapter->stats64_lock);
3755 igb_update_stats(adapter, &adapter->stats64);
3756 spin_unlock(&adapter->stats64_lock);
3758 for (i = 0; i < adapter->num_tx_queues; i++) {
3759 struct igb_ring *tx_ring = adapter->tx_ring[i];
3760 if (!netif_carrier_ok(netdev)) {
3761 /* We've lost link, so the controller stops DMA,
3762 * but we've got queued Tx work that's never going
3763 * to get done, so reset controller to flush Tx.
3764 * (Do the reset outside of interrupt context). */
3765 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3766 adapter->tx_timeout_count++;
3767 schedule_work(&adapter->reset_task);
3768 /* return immediately since reset is imminent */
3769 return;
3773 /* Force detection of hung controller every watchdog period */
3774 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3777 /* Cause software interrupt to ensure rx ring is cleaned */
3778 if (adapter->msix_entries) {
3779 u32 eics = 0;
3780 for (i = 0; i < adapter->num_q_vectors; i++)
3781 eics |= adapter->q_vector[i]->eims_value;
3782 wr32(E1000_EICS, eics);
3783 } else {
3784 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3787 igb_spoof_check(adapter);
3789 /* Reset the timer */
3790 if (!test_bit(__IGB_DOWN, &adapter->state))
3791 mod_timer(&adapter->watchdog_timer,
3792 round_jiffies(jiffies + 2 * HZ));
3795 enum latency_range {
3796 lowest_latency = 0,
3797 low_latency = 1,
3798 bulk_latency = 2,
3799 latency_invalid = 255
3803 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3805 * Stores a new ITR value based on strictly on packet size. This
3806 * algorithm is less sophisticated than that used in igb_update_itr,
3807 * due to the difficulty of synchronizing statistics across multiple
3808 * receive rings. The divisors and thresholds used by this function
3809 * were determined based on theoretical maximum wire speed and testing
3810 * data, in order to minimize response time while increasing bulk
3811 * throughput.
3812 * This functionality is controlled by the InterruptThrottleRate module
3813 * parameter (see igb_param.c)
3814 * NOTE: This function is called only when operating in a multiqueue
3815 * receive environment.
3816 * @q_vector: pointer to q_vector
3818 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3820 int new_val = q_vector->itr_val;
3821 int avg_wire_size = 0;
3822 struct igb_adapter *adapter = q_vector->adapter;
3823 unsigned int packets;
3825 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3826 * ints/sec - ITR timer value of 120 ticks.
3828 if (adapter->link_speed != SPEED_1000) {
3829 new_val = IGB_4K_ITR;
3830 goto set_itr_val;
3833 packets = q_vector->rx.total_packets;
3834 if (packets)
3835 avg_wire_size = q_vector->rx.total_bytes / packets;
3837 packets = q_vector->tx.total_packets;
3838 if (packets)
3839 avg_wire_size = max_t(u32, avg_wire_size,
3840 q_vector->tx.total_bytes / packets);
3842 /* if avg_wire_size isn't set no work was done */
3843 if (!avg_wire_size)
3844 goto clear_counts;
3846 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3847 avg_wire_size += 24;
3849 /* Don't starve jumbo frames */
3850 avg_wire_size = min(avg_wire_size, 3000);
3852 /* Give a little boost to mid-size frames */
3853 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3854 new_val = avg_wire_size / 3;
3855 else
3856 new_val = avg_wire_size / 2;
3858 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3859 if (new_val < IGB_20K_ITR &&
3860 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3861 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3862 new_val = IGB_20K_ITR;
3864 set_itr_val:
3865 if (new_val != q_vector->itr_val) {
3866 q_vector->itr_val = new_val;
3867 q_vector->set_itr = 1;
3869 clear_counts:
3870 q_vector->rx.total_bytes = 0;
3871 q_vector->rx.total_packets = 0;
3872 q_vector->tx.total_bytes = 0;
3873 q_vector->tx.total_packets = 0;
3877 * igb_update_itr - update the dynamic ITR value based on statistics
3878 * Stores a new ITR value based on packets and byte
3879 * counts during the last interrupt. The advantage of per interrupt
3880 * computation is faster updates and more accurate ITR for the current
3881 * traffic pattern. Constants in this function were computed
3882 * based on theoretical maximum wire speed and thresholds were set based
3883 * on testing data as well as attempting to minimize response time
3884 * while increasing bulk throughput.
3885 * this functionality is controlled by the InterruptThrottleRate module
3886 * parameter (see igb_param.c)
3887 * NOTE: These calculations are only valid when operating in a single-
3888 * queue environment.
3889 * @q_vector: pointer to q_vector
3890 * @ring_container: ring info to update the itr for
3892 static void igb_update_itr(struct igb_q_vector *q_vector,
3893 struct igb_ring_container *ring_container)
3895 unsigned int packets = ring_container->total_packets;
3896 unsigned int bytes = ring_container->total_bytes;
3897 u8 itrval = ring_container->itr;
3899 /* no packets, exit with status unchanged */
3900 if (packets == 0)
3901 return;
3903 switch (itrval) {
3904 case lowest_latency:
3905 /* handle TSO and jumbo frames */
3906 if (bytes/packets > 8000)
3907 itrval = bulk_latency;
3908 else if ((packets < 5) && (bytes > 512))
3909 itrval = low_latency;
3910 break;
3911 case low_latency: /* 50 usec aka 20000 ints/s */
3912 if (bytes > 10000) {
3913 /* this if handles the TSO accounting */
3914 if (bytes/packets > 8000) {
3915 itrval = bulk_latency;
3916 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3917 itrval = bulk_latency;
3918 } else if ((packets > 35)) {
3919 itrval = lowest_latency;
3921 } else if (bytes/packets > 2000) {
3922 itrval = bulk_latency;
3923 } else if (packets <= 2 && bytes < 512) {
3924 itrval = lowest_latency;
3926 break;
3927 case bulk_latency: /* 250 usec aka 4000 ints/s */
3928 if (bytes > 25000) {
3929 if (packets > 35)
3930 itrval = low_latency;
3931 } else if (bytes < 1500) {
3932 itrval = low_latency;
3934 break;
3937 /* clear work counters since we have the values we need */
3938 ring_container->total_bytes = 0;
3939 ring_container->total_packets = 0;
3941 /* write updated itr to ring container */
3942 ring_container->itr = itrval;
3945 static void igb_set_itr(struct igb_q_vector *q_vector)
3947 struct igb_adapter *adapter = q_vector->adapter;
3948 u32 new_itr = q_vector->itr_val;
3949 u8 current_itr = 0;
3951 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3952 if (adapter->link_speed != SPEED_1000) {
3953 current_itr = 0;
3954 new_itr = IGB_4K_ITR;
3955 goto set_itr_now;
3958 igb_update_itr(q_vector, &q_vector->tx);
3959 igb_update_itr(q_vector, &q_vector->rx);
3961 current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3963 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3964 if (current_itr == lowest_latency &&
3965 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3966 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3967 current_itr = low_latency;
3969 switch (current_itr) {
3970 /* counts and packets in update_itr are dependent on these numbers */
3971 case lowest_latency:
3972 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3973 break;
3974 case low_latency:
3975 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3976 break;
3977 case bulk_latency:
3978 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
3979 break;
3980 default:
3981 break;
3984 set_itr_now:
3985 if (new_itr != q_vector->itr_val) {
3986 /* this attempts to bias the interrupt rate towards Bulk
3987 * by adding intermediate steps when interrupt rate is
3988 * increasing */
3989 new_itr = new_itr > q_vector->itr_val ?
3990 max((new_itr * q_vector->itr_val) /
3991 (new_itr + (q_vector->itr_val >> 2)),
3992 new_itr) :
3993 new_itr;
3994 /* Don't write the value here; it resets the adapter's
3995 * internal timer, and causes us to delay far longer than
3996 * we should between interrupts. Instead, we write the ITR
3997 * value at the beginning of the next interrupt so the timing
3998 * ends up being correct.
4000 q_vector->itr_val = new_itr;
4001 q_vector->set_itr = 1;
4005 void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4006 u32 type_tucmd, u32 mss_l4len_idx)
4008 struct e1000_adv_tx_context_desc *context_desc;
4009 u16 i = tx_ring->next_to_use;
4011 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4013 i++;
4014 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4016 /* set bits to identify this as an advanced context descriptor */
4017 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4019 /* For 82575, context index must be unique per ring. */
4020 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4021 mss_l4len_idx |= tx_ring->reg_idx << 4;
4023 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
4024 context_desc->seqnum_seed = 0;
4025 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
4026 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4029 static int igb_tso(struct igb_ring *tx_ring,
4030 struct igb_tx_buffer *first,
4031 u8 *hdr_len)
4033 struct sk_buff *skb = first->skb;
4034 u32 vlan_macip_lens, type_tucmd;
4035 u32 mss_l4len_idx, l4len;
4037 if (!skb_is_gso(skb))
4038 return 0;
4040 if (skb_header_cloned(skb)) {
4041 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4042 if (err)
4043 return err;
4046 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4047 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4049 if (first->protocol == __constant_htons(ETH_P_IP)) {
4050 struct iphdr *iph = ip_hdr(skb);
4051 iph->tot_len = 0;
4052 iph->check = 0;
4053 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4054 iph->daddr, 0,
4055 IPPROTO_TCP,
4057 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4058 first->tx_flags |= IGB_TX_FLAGS_TSO |
4059 IGB_TX_FLAGS_CSUM |
4060 IGB_TX_FLAGS_IPV4;
4061 } else if (skb_is_gso_v6(skb)) {
4062 ipv6_hdr(skb)->payload_len = 0;
4063 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4064 &ipv6_hdr(skb)->daddr,
4065 0, IPPROTO_TCP, 0);
4066 first->tx_flags |= IGB_TX_FLAGS_TSO |
4067 IGB_TX_FLAGS_CSUM;
4070 /* compute header lengths */
4071 l4len = tcp_hdrlen(skb);
4072 *hdr_len = skb_transport_offset(skb) + l4len;
4074 /* update gso size and bytecount with header size */
4075 first->gso_segs = skb_shinfo(skb)->gso_segs;
4076 first->bytecount += (first->gso_segs - 1) * *hdr_len;
4078 /* MSS L4LEN IDX */
4079 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4080 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4082 /* VLAN MACLEN IPLEN */
4083 vlan_macip_lens = skb_network_header_len(skb);
4084 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4085 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4087 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4089 return 1;
4092 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4094 struct sk_buff *skb = first->skb;
4095 u32 vlan_macip_lens = 0;
4096 u32 mss_l4len_idx = 0;
4097 u32 type_tucmd = 0;
4099 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4100 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4101 return;
4102 } else {
4103 u8 l4_hdr = 0;
4104 switch (first->protocol) {
4105 case __constant_htons(ETH_P_IP):
4106 vlan_macip_lens |= skb_network_header_len(skb);
4107 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4108 l4_hdr = ip_hdr(skb)->protocol;
4109 break;
4110 case __constant_htons(ETH_P_IPV6):
4111 vlan_macip_lens |= skb_network_header_len(skb);
4112 l4_hdr = ipv6_hdr(skb)->nexthdr;
4113 break;
4114 default:
4115 if (unlikely(net_ratelimit())) {
4116 dev_warn(tx_ring->dev,
4117 "partial checksum but proto=%x!\n",
4118 first->protocol);
4120 break;
4123 switch (l4_hdr) {
4124 case IPPROTO_TCP:
4125 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4126 mss_l4len_idx = tcp_hdrlen(skb) <<
4127 E1000_ADVTXD_L4LEN_SHIFT;
4128 break;
4129 case IPPROTO_SCTP:
4130 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4131 mss_l4len_idx = sizeof(struct sctphdr) <<
4132 E1000_ADVTXD_L4LEN_SHIFT;
4133 break;
4134 case IPPROTO_UDP:
4135 mss_l4len_idx = sizeof(struct udphdr) <<
4136 E1000_ADVTXD_L4LEN_SHIFT;
4137 break;
4138 default:
4139 if (unlikely(net_ratelimit())) {
4140 dev_warn(tx_ring->dev,
4141 "partial checksum but l4 proto=%x!\n",
4142 l4_hdr);
4144 break;
4147 /* update TX checksum flag */
4148 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4151 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4152 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4154 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4157 static __le32 igb_tx_cmd_type(u32 tx_flags)
4159 /* set type for advanced descriptor with frame checksum insertion */
4160 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4161 E1000_ADVTXD_DCMD_IFCS |
4162 E1000_ADVTXD_DCMD_DEXT);
4164 /* set HW vlan bit if vlan is present */
4165 if (tx_flags & IGB_TX_FLAGS_VLAN)
4166 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4168 /* set timestamp bit if present */
4169 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4170 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4172 /* set segmentation bits for TSO */
4173 if (tx_flags & IGB_TX_FLAGS_TSO)
4174 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4176 return cmd_type;
4179 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4180 union e1000_adv_tx_desc *tx_desc,
4181 u32 tx_flags, unsigned int paylen)
4183 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4185 /* 82575 requires a unique index per ring if any offload is enabled */
4186 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4187 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4188 olinfo_status |= tx_ring->reg_idx << 4;
4190 /* insert L4 checksum */
4191 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4192 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4194 /* insert IPv4 checksum */
4195 if (tx_flags & IGB_TX_FLAGS_IPV4)
4196 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4199 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4203 * The largest size we can write to the descriptor is 65535. In order to
4204 * maintain a power of two alignment we have to limit ourselves to 32K.
4206 #define IGB_MAX_TXD_PWR 15
4207 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4209 static void igb_tx_map(struct igb_ring *tx_ring,
4210 struct igb_tx_buffer *first,
4211 const u8 hdr_len)
4213 struct sk_buff *skb = first->skb;
4214 struct igb_tx_buffer *tx_buffer_info;
4215 union e1000_adv_tx_desc *tx_desc;
4216 dma_addr_t dma;
4217 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4218 unsigned int data_len = skb->data_len;
4219 unsigned int size = skb_headlen(skb);
4220 unsigned int paylen = skb->len - hdr_len;
4221 __le32 cmd_type;
4222 u32 tx_flags = first->tx_flags;
4223 u16 i = tx_ring->next_to_use;
4225 tx_desc = IGB_TX_DESC(tx_ring, i);
4227 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4228 cmd_type = igb_tx_cmd_type(tx_flags);
4230 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4231 if (dma_mapping_error(tx_ring->dev, dma))
4232 goto dma_error;
4234 /* record length, and DMA address */
4235 first->length = size;
4236 first->dma = dma;
4237 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4239 for (;;) {
4240 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4241 tx_desc->read.cmd_type_len =
4242 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4244 i++;
4245 tx_desc++;
4246 if (i == tx_ring->count) {
4247 tx_desc = IGB_TX_DESC(tx_ring, 0);
4248 i = 0;
4251 dma += IGB_MAX_DATA_PER_TXD;
4252 size -= IGB_MAX_DATA_PER_TXD;
4254 tx_desc->read.olinfo_status = 0;
4255 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4258 if (likely(!data_len))
4259 break;
4261 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4263 i++;
4264 tx_desc++;
4265 if (i == tx_ring->count) {
4266 tx_desc = IGB_TX_DESC(tx_ring, 0);
4267 i = 0;
4270 size = skb_frag_size(frag);
4271 data_len -= size;
4273 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4274 size, DMA_TO_DEVICE);
4275 if (dma_mapping_error(tx_ring->dev, dma))
4276 goto dma_error;
4278 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4279 tx_buffer_info->length = size;
4280 tx_buffer_info->dma = dma;
4282 tx_desc->read.olinfo_status = 0;
4283 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4285 frag++;
4288 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4290 /* write last descriptor with RS and EOP bits */
4291 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4292 tx_desc->read.cmd_type_len = cmd_type;
4294 /* set the timestamp */
4295 first->time_stamp = jiffies;
4298 * Force memory writes to complete before letting h/w know there
4299 * are new descriptors to fetch. (Only applicable for weak-ordered
4300 * memory model archs, such as IA-64).
4302 * We also need this memory barrier to make certain all of the
4303 * status bits have been updated before next_to_watch is written.
4305 wmb();
4307 /* set next_to_watch value indicating a packet is present */
4308 first->next_to_watch = tx_desc;
4310 i++;
4311 if (i == tx_ring->count)
4312 i = 0;
4314 tx_ring->next_to_use = i;
4316 writel(i, tx_ring->tail);
4318 /* we need this if more than one processor can write to our tail
4319 * at a time, it syncronizes IO on IA64/Altix systems */
4320 mmiowb();
4322 return;
4324 dma_error:
4325 dev_err(tx_ring->dev, "TX DMA map failed\n");
4327 /* clear dma mappings for failed tx_buffer_info map */
4328 for (;;) {
4329 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4330 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4331 if (tx_buffer_info == first)
4332 break;
4333 if (i == 0)
4334 i = tx_ring->count;
4335 i--;
4338 tx_ring->next_to_use = i;
4341 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4343 struct net_device *netdev = tx_ring->netdev;
4345 netif_stop_subqueue(netdev, tx_ring->queue_index);
4347 /* Herbert's original patch had:
4348 * smp_mb__after_netif_stop_queue();
4349 * but since that doesn't exist yet, just open code it. */
4350 smp_mb();
4352 /* We need to check again in a case another CPU has just
4353 * made room available. */
4354 if (igb_desc_unused(tx_ring) < size)
4355 return -EBUSY;
4357 /* A reprieve! */
4358 netif_wake_subqueue(netdev, tx_ring->queue_index);
4360 u64_stats_update_begin(&tx_ring->tx_syncp2);
4361 tx_ring->tx_stats.restart_queue2++;
4362 u64_stats_update_end(&tx_ring->tx_syncp2);
4364 return 0;
4367 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4369 if (igb_desc_unused(tx_ring) >= size)
4370 return 0;
4371 return __igb_maybe_stop_tx(tx_ring, size);
4374 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4375 struct igb_ring *tx_ring)
4377 struct igb_tx_buffer *first;
4378 int tso;
4379 u32 tx_flags = 0;
4380 __be16 protocol = vlan_get_protocol(skb);
4381 u8 hdr_len = 0;
4383 /* need: 1 descriptor per page,
4384 * + 2 desc gap to keep tail from touching head,
4385 * + 1 desc for skb->data,
4386 * + 1 desc for context descriptor,
4387 * otherwise try next time */
4388 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4389 /* this is a hard error */
4390 return NETDEV_TX_BUSY;
4393 /* record the location of the first descriptor for this packet */
4394 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4395 first->skb = skb;
4396 first->bytecount = skb->len;
4397 first->gso_segs = 1;
4399 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4400 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4401 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4404 if (vlan_tx_tag_present(skb)) {
4405 tx_flags |= IGB_TX_FLAGS_VLAN;
4406 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4409 /* record initial flags and protocol */
4410 first->tx_flags = tx_flags;
4411 first->protocol = protocol;
4413 tso = igb_tso(tx_ring, first, &hdr_len);
4414 if (tso < 0)
4415 goto out_drop;
4416 else if (!tso)
4417 igb_tx_csum(tx_ring, first);
4419 igb_tx_map(tx_ring, first, hdr_len);
4421 /* Make sure there is space in the ring for the next send. */
4422 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4424 return NETDEV_TX_OK;
4426 out_drop:
4427 igb_unmap_and_free_tx_resource(tx_ring, first);
4429 return NETDEV_TX_OK;
4432 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4433 struct sk_buff *skb)
4435 unsigned int r_idx = skb->queue_mapping;
4437 if (r_idx >= adapter->num_tx_queues)
4438 r_idx = r_idx % adapter->num_tx_queues;
4440 return adapter->tx_ring[r_idx];
4443 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4444 struct net_device *netdev)
4446 struct igb_adapter *adapter = netdev_priv(netdev);
4448 if (test_bit(__IGB_DOWN, &adapter->state)) {
4449 dev_kfree_skb_any(skb);
4450 return NETDEV_TX_OK;
4453 if (skb->len <= 0) {
4454 dev_kfree_skb_any(skb);
4455 return NETDEV_TX_OK;
4459 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4460 * in order to meet this minimum size requirement.
4462 if (skb->len < 17) {
4463 if (skb_padto(skb, 17))
4464 return NETDEV_TX_OK;
4465 skb->len = 17;
4468 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4472 * igb_tx_timeout - Respond to a Tx Hang
4473 * @netdev: network interface device structure
4475 static void igb_tx_timeout(struct net_device *netdev)
4477 struct igb_adapter *adapter = netdev_priv(netdev);
4478 struct e1000_hw *hw = &adapter->hw;
4480 /* Do the reset outside of interrupt context */
4481 adapter->tx_timeout_count++;
4483 if (hw->mac.type >= e1000_82580)
4484 hw->dev_spec._82575.global_device_reset = true;
4486 schedule_work(&adapter->reset_task);
4487 wr32(E1000_EICS,
4488 (adapter->eims_enable_mask & ~adapter->eims_other));
4491 static void igb_reset_task(struct work_struct *work)
4493 struct igb_adapter *adapter;
4494 adapter = container_of(work, struct igb_adapter, reset_task);
4496 igb_dump(adapter);
4497 netdev_err(adapter->netdev, "Reset adapter\n");
4498 igb_reinit_locked(adapter);
4502 * igb_get_stats64 - Get System Network Statistics
4503 * @netdev: network interface device structure
4504 * @stats: rtnl_link_stats64 pointer
4507 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4508 struct rtnl_link_stats64 *stats)
4510 struct igb_adapter *adapter = netdev_priv(netdev);
4512 spin_lock(&adapter->stats64_lock);
4513 igb_update_stats(adapter, &adapter->stats64);
4514 memcpy(stats, &adapter->stats64, sizeof(*stats));
4515 spin_unlock(&adapter->stats64_lock);
4517 return stats;
4521 * igb_change_mtu - Change the Maximum Transfer Unit
4522 * @netdev: network interface device structure
4523 * @new_mtu: new value for maximum frame size
4525 * Returns 0 on success, negative on failure
4527 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4529 struct igb_adapter *adapter = netdev_priv(netdev);
4530 struct pci_dev *pdev = adapter->pdev;
4531 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4533 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4534 dev_err(&pdev->dev, "Invalid MTU setting\n");
4535 return -EINVAL;
4538 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4539 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4540 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4541 return -EINVAL;
4544 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4545 msleep(1);
4547 /* igb_down has a dependency on max_frame_size */
4548 adapter->max_frame_size = max_frame;
4550 if (netif_running(netdev))
4551 igb_down(adapter);
4553 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4554 netdev->mtu, new_mtu);
4555 netdev->mtu = new_mtu;
4557 if (netif_running(netdev))
4558 igb_up(adapter);
4559 else
4560 igb_reset(adapter);
4562 clear_bit(__IGB_RESETTING, &adapter->state);
4564 return 0;
4568 * igb_update_stats - Update the board statistics counters
4569 * @adapter: board private structure
4572 void igb_update_stats(struct igb_adapter *adapter,
4573 struct rtnl_link_stats64 *net_stats)
4575 struct e1000_hw *hw = &adapter->hw;
4576 struct pci_dev *pdev = adapter->pdev;
4577 u32 reg, mpc;
4578 u16 phy_tmp;
4579 int i;
4580 u64 bytes, packets;
4581 unsigned int start;
4582 u64 _bytes, _packets;
4584 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4587 * Prevent stats update while adapter is being reset, or if the pci
4588 * connection is down.
4590 if (adapter->link_speed == 0)
4591 return;
4592 if (pci_channel_offline(pdev))
4593 return;
4595 bytes = 0;
4596 packets = 0;
4597 for (i = 0; i < adapter->num_rx_queues; i++) {
4598 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4599 struct igb_ring *ring = adapter->rx_ring[i];
4601 ring->rx_stats.drops += rqdpc_tmp;
4602 net_stats->rx_fifo_errors += rqdpc_tmp;
4604 do {
4605 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4606 _bytes = ring->rx_stats.bytes;
4607 _packets = ring->rx_stats.packets;
4608 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4609 bytes += _bytes;
4610 packets += _packets;
4613 net_stats->rx_bytes = bytes;
4614 net_stats->rx_packets = packets;
4616 bytes = 0;
4617 packets = 0;
4618 for (i = 0; i < adapter->num_tx_queues; i++) {
4619 struct igb_ring *ring = adapter->tx_ring[i];
4620 do {
4621 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4622 _bytes = ring->tx_stats.bytes;
4623 _packets = ring->tx_stats.packets;
4624 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4625 bytes += _bytes;
4626 packets += _packets;
4628 net_stats->tx_bytes = bytes;
4629 net_stats->tx_packets = packets;
4631 /* read stats registers */
4632 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4633 adapter->stats.gprc += rd32(E1000_GPRC);
4634 adapter->stats.gorc += rd32(E1000_GORCL);
4635 rd32(E1000_GORCH); /* clear GORCL */
4636 adapter->stats.bprc += rd32(E1000_BPRC);
4637 adapter->stats.mprc += rd32(E1000_MPRC);
4638 adapter->stats.roc += rd32(E1000_ROC);
4640 adapter->stats.prc64 += rd32(E1000_PRC64);
4641 adapter->stats.prc127 += rd32(E1000_PRC127);
4642 adapter->stats.prc255 += rd32(E1000_PRC255);
4643 adapter->stats.prc511 += rd32(E1000_PRC511);
4644 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4645 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4646 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4647 adapter->stats.sec += rd32(E1000_SEC);
4649 mpc = rd32(E1000_MPC);
4650 adapter->stats.mpc += mpc;
4651 net_stats->rx_fifo_errors += mpc;
4652 adapter->stats.scc += rd32(E1000_SCC);
4653 adapter->stats.ecol += rd32(E1000_ECOL);
4654 adapter->stats.mcc += rd32(E1000_MCC);
4655 adapter->stats.latecol += rd32(E1000_LATECOL);
4656 adapter->stats.dc += rd32(E1000_DC);
4657 adapter->stats.rlec += rd32(E1000_RLEC);
4658 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4659 adapter->stats.xontxc += rd32(E1000_XONTXC);
4660 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4661 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4662 adapter->stats.fcruc += rd32(E1000_FCRUC);
4663 adapter->stats.gptc += rd32(E1000_GPTC);
4664 adapter->stats.gotc += rd32(E1000_GOTCL);
4665 rd32(E1000_GOTCH); /* clear GOTCL */
4666 adapter->stats.rnbc += rd32(E1000_RNBC);
4667 adapter->stats.ruc += rd32(E1000_RUC);
4668 adapter->stats.rfc += rd32(E1000_RFC);
4669 adapter->stats.rjc += rd32(E1000_RJC);
4670 adapter->stats.tor += rd32(E1000_TORH);
4671 adapter->stats.tot += rd32(E1000_TOTH);
4672 adapter->stats.tpr += rd32(E1000_TPR);
4674 adapter->stats.ptc64 += rd32(E1000_PTC64);
4675 adapter->stats.ptc127 += rd32(E1000_PTC127);
4676 adapter->stats.ptc255 += rd32(E1000_PTC255);
4677 adapter->stats.ptc511 += rd32(E1000_PTC511);
4678 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4679 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4681 adapter->stats.mptc += rd32(E1000_MPTC);
4682 adapter->stats.bptc += rd32(E1000_BPTC);
4684 adapter->stats.tpt += rd32(E1000_TPT);
4685 adapter->stats.colc += rd32(E1000_COLC);
4687 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4688 /* read internal phy specific stats */
4689 reg = rd32(E1000_CTRL_EXT);
4690 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4691 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4692 adapter->stats.tncrs += rd32(E1000_TNCRS);
4695 adapter->stats.tsctc += rd32(E1000_TSCTC);
4696 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4698 adapter->stats.iac += rd32(E1000_IAC);
4699 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4700 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4701 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4702 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4703 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4704 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4705 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4706 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4708 /* Fill out the OS statistics structure */
4709 net_stats->multicast = adapter->stats.mprc;
4710 net_stats->collisions = adapter->stats.colc;
4712 /* Rx Errors */
4714 /* RLEC on some newer hardware can be incorrect so build
4715 * our own version based on RUC and ROC */
4716 net_stats->rx_errors = adapter->stats.rxerrc +
4717 adapter->stats.crcerrs + adapter->stats.algnerrc +
4718 adapter->stats.ruc + adapter->stats.roc +
4719 adapter->stats.cexterr;
4720 net_stats->rx_length_errors = adapter->stats.ruc +
4721 adapter->stats.roc;
4722 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4723 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4724 net_stats->rx_missed_errors = adapter->stats.mpc;
4726 /* Tx Errors */
4727 net_stats->tx_errors = adapter->stats.ecol +
4728 adapter->stats.latecol;
4729 net_stats->tx_aborted_errors = adapter->stats.ecol;
4730 net_stats->tx_window_errors = adapter->stats.latecol;
4731 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4733 /* Tx Dropped needs to be maintained elsewhere */
4735 /* Phy Stats */
4736 if (hw->phy.media_type == e1000_media_type_copper) {
4737 if ((adapter->link_speed == SPEED_1000) &&
4738 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4739 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4740 adapter->phy_stats.idle_errors += phy_tmp;
4744 /* Management Stats */
4745 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4746 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4747 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4749 /* OS2BMC Stats */
4750 reg = rd32(E1000_MANC);
4751 if (reg & E1000_MANC_EN_BMC2OS) {
4752 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4753 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4754 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4755 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4759 static irqreturn_t igb_msix_other(int irq, void *data)
4761 struct igb_adapter *adapter = data;
4762 struct e1000_hw *hw = &adapter->hw;
4763 u32 icr = rd32(E1000_ICR);
4764 /* reading ICR causes bit 31 of EICR to be cleared */
4766 if (icr & E1000_ICR_DRSTA)
4767 schedule_work(&adapter->reset_task);
4769 if (icr & E1000_ICR_DOUTSYNC) {
4770 /* HW is reporting DMA is out of sync */
4771 adapter->stats.doosync++;
4772 /* The DMA Out of Sync is also indication of a spoof event
4773 * in IOV mode. Check the Wrong VM Behavior register to
4774 * see if it is really a spoof event. */
4775 igb_check_wvbr(adapter);
4778 /* Check for a mailbox event */
4779 if (icr & E1000_ICR_VMMB)
4780 igb_msg_task(adapter);
4782 if (icr & E1000_ICR_LSC) {
4783 hw->mac.get_link_status = 1;
4784 /* guard against interrupt when we're going down */
4785 if (!test_bit(__IGB_DOWN, &adapter->state))
4786 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4789 wr32(E1000_EIMS, adapter->eims_other);
4791 return IRQ_HANDLED;
4794 static void igb_write_itr(struct igb_q_vector *q_vector)
4796 struct igb_adapter *adapter = q_vector->adapter;
4797 u32 itr_val = q_vector->itr_val & 0x7FFC;
4799 if (!q_vector->set_itr)
4800 return;
4802 if (!itr_val)
4803 itr_val = 0x4;
4805 if (adapter->hw.mac.type == e1000_82575)
4806 itr_val |= itr_val << 16;
4807 else
4808 itr_val |= E1000_EITR_CNT_IGNR;
4810 writel(itr_val, q_vector->itr_register);
4811 q_vector->set_itr = 0;
4814 static irqreturn_t igb_msix_ring(int irq, void *data)
4816 struct igb_q_vector *q_vector = data;
4818 /* Write the ITR value calculated from the previous interrupt. */
4819 igb_write_itr(q_vector);
4821 napi_schedule(&q_vector->napi);
4823 return IRQ_HANDLED;
4826 #ifdef CONFIG_IGB_DCA
4827 static void igb_update_dca(struct igb_q_vector *q_vector)
4829 struct igb_adapter *adapter = q_vector->adapter;
4830 struct e1000_hw *hw = &adapter->hw;
4831 int cpu = get_cpu();
4833 if (q_vector->cpu == cpu)
4834 goto out_no_update;
4836 if (q_vector->tx.ring) {
4837 int q = q_vector->tx.ring->reg_idx;
4838 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4839 if (hw->mac.type == e1000_82575) {
4840 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4841 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4842 } else {
4843 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4844 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4845 E1000_DCA_TXCTRL_CPUID_SHIFT;
4847 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4848 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4850 if (q_vector->rx.ring) {
4851 int q = q_vector->rx.ring->reg_idx;
4852 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4853 if (hw->mac.type == e1000_82575) {
4854 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4855 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4856 } else {
4857 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4858 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4859 E1000_DCA_RXCTRL_CPUID_SHIFT;
4861 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4862 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4863 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4864 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4866 q_vector->cpu = cpu;
4867 out_no_update:
4868 put_cpu();
4871 static void igb_setup_dca(struct igb_adapter *adapter)
4873 struct e1000_hw *hw = &adapter->hw;
4874 int i;
4876 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4877 return;
4879 /* Always use CB2 mode, difference is masked in the CB driver. */
4880 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4882 for (i = 0; i < adapter->num_q_vectors; i++) {
4883 adapter->q_vector[i]->cpu = -1;
4884 igb_update_dca(adapter->q_vector[i]);
4888 static int __igb_notify_dca(struct device *dev, void *data)
4890 struct net_device *netdev = dev_get_drvdata(dev);
4891 struct igb_adapter *adapter = netdev_priv(netdev);
4892 struct pci_dev *pdev = adapter->pdev;
4893 struct e1000_hw *hw = &adapter->hw;
4894 unsigned long event = *(unsigned long *)data;
4896 switch (event) {
4897 case DCA_PROVIDER_ADD:
4898 /* if already enabled, don't do it again */
4899 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4900 break;
4901 if (dca_add_requester(dev) == 0) {
4902 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4903 dev_info(&pdev->dev, "DCA enabled\n");
4904 igb_setup_dca(adapter);
4905 break;
4907 /* Fall Through since DCA is disabled. */
4908 case DCA_PROVIDER_REMOVE:
4909 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4910 /* without this a class_device is left
4911 * hanging around in the sysfs model */
4912 dca_remove_requester(dev);
4913 dev_info(&pdev->dev, "DCA disabled\n");
4914 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4915 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4917 break;
4920 return 0;
4923 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4924 void *p)
4926 int ret_val;
4928 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4929 __igb_notify_dca);
4931 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4933 #endif /* CONFIG_IGB_DCA */
4935 #ifdef CONFIG_PCI_IOV
4936 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4938 unsigned char mac_addr[ETH_ALEN];
4939 struct pci_dev *pdev = adapter->pdev;
4940 struct e1000_hw *hw = &adapter->hw;
4941 struct pci_dev *pvfdev;
4942 unsigned int device_id;
4943 u16 thisvf_devfn;
4945 random_ether_addr(mac_addr);
4946 igb_set_vf_mac(adapter, vf, mac_addr);
4948 switch (adapter->hw.mac.type) {
4949 case e1000_82576:
4950 device_id = IGB_82576_VF_DEV_ID;
4951 /* VF Stride for 82576 is 2 */
4952 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4953 (pdev->devfn & 1);
4954 break;
4955 case e1000_i350:
4956 device_id = IGB_I350_VF_DEV_ID;
4957 /* VF Stride for I350 is 4 */
4958 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4959 (pdev->devfn & 3);
4960 break;
4961 default:
4962 device_id = 0;
4963 thisvf_devfn = 0;
4964 break;
4967 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4968 while (pvfdev) {
4969 if (pvfdev->devfn == thisvf_devfn)
4970 break;
4971 pvfdev = pci_get_device(hw->vendor_id,
4972 device_id, pvfdev);
4975 if (pvfdev)
4976 adapter->vf_data[vf].vfdev = pvfdev;
4977 else
4978 dev_err(&pdev->dev,
4979 "Couldn't find pci dev ptr for VF %4.4x\n",
4980 thisvf_devfn);
4981 return pvfdev != NULL;
4984 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4986 struct e1000_hw *hw = &adapter->hw;
4987 struct pci_dev *pdev = adapter->pdev;
4988 struct pci_dev *pvfdev;
4989 u16 vf_devfn = 0;
4990 u16 vf_stride;
4991 unsigned int device_id;
4992 int vfs_found = 0;
4994 switch (adapter->hw.mac.type) {
4995 case e1000_82576:
4996 device_id = IGB_82576_VF_DEV_ID;
4997 /* VF Stride for 82576 is 2 */
4998 vf_stride = 2;
4999 break;
5000 case e1000_i350:
5001 device_id = IGB_I350_VF_DEV_ID;
5002 /* VF Stride for I350 is 4 */
5003 vf_stride = 4;
5004 break;
5005 default:
5006 device_id = 0;
5007 vf_stride = 0;
5008 break;
5011 vf_devfn = pdev->devfn + 0x80;
5012 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5013 while (pvfdev) {
5014 if (pvfdev->devfn == vf_devfn)
5015 vfs_found++;
5016 vf_devfn += vf_stride;
5017 pvfdev = pci_get_device(hw->vendor_id,
5018 device_id, pvfdev);
5021 return vfs_found;
5024 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5026 int i;
5027 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5028 if (adapter->vf_data[i].vfdev) {
5029 if (adapter->vf_data[i].vfdev->dev_flags &
5030 PCI_DEV_FLAGS_ASSIGNED)
5031 return true;
5034 return false;
5037 #endif
5038 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5040 struct e1000_hw *hw = &adapter->hw;
5041 u32 ping;
5042 int i;
5044 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5045 ping = E1000_PF_CONTROL_MSG;
5046 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5047 ping |= E1000_VT_MSGTYPE_CTS;
5048 igb_write_mbx(hw, &ping, 1, i);
5052 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5054 struct e1000_hw *hw = &adapter->hw;
5055 u32 vmolr = rd32(E1000_VMOLR(vf));
5056 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5058 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5059 IGB_VF_FLAG_MULTI_PROMISC);
5060 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5062 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5063 vmolr |= E1000_VMOLR_MPME;
5064 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5065 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5066 } else {
5068 * if we have hashes and we are clearing a multicast promisc
5069 * flag we need to write the hashes to the MTA as this step
5070 * was previously skipped
5072 if (vf_data->num_vf_mc_hashes > 30) {
5073 vmolr |= E1000_VMOLR_MPME;
5074 } else if (vf_data->num_vf_mc_hashes) {
5075 int j;
5076 vmolr |= E1000_VMOLR_ROMPE;
5077 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5078 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5082 wr32(E1000_VMOLR(vf), vmolr);
5084 /* there are flags left unprocessed, likely not supported */
5085 if (*msgbuf & E1000_VT_MSGINFO_MASK)
5086 return -EINVAL;
5088 return 0;
5092 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5093 u32 *msgbuf, u32 vf)
5095 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5096 u16 *hash_list = (u16 *)&msgbuf[1];
5097 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5098 int i;
5100 /* salt away the number of multicast addresses assigned
5101 * to this VF for later use to restore when the PF multi cast
5102 * list changes
5104 vf_data->num_vf_mc_hashes = n;
5106 /* only up to 30 hash values supported */
5107 if (n > 30)
5108 n = 30;
5110 /* store the hashes for later use */
5111 for (i = 0; i < n; i++)
5112 vf_data->vf_mc_hashes[i] = hash_list[i];
5114 /* Flush and reset the mta with the new values */
5115 igb_set_rx_mode(adapter->netdev);
5117 return 0;
5120 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5122 struct e1000_hw *hw = &adapter->hw;
5123 struct vf_data_storage *vf_data;
5124 int i, j;
5126 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5127 u32 vmolr = rd32(E1000_VMOLR(i));
5128 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5130 vf_data = &adapter->vf_data[i];
5132 if ((vf_data->num_vf_mc_hashes > 30) ||
5133 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5134 vmolr |= E1000_VMOLR_MPME;
5135 } else if (vf_data->num_vf_mc_hashes) {
5136 vmolr |= E1000_VMOLR_ROMPE;
5137 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5138 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5140 wr32(E1000_VMOLR(i), vmolr);
5144 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5146 struct e1000_hw *hw = &adapter->hw;
5147 u32 pool_mask, reg, vid;
5148 int i;
5150 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5152 /* Find the vlan filter for this id */
5153 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5154 reg = rd32(E1000_VLVF(i));
5156 /* remove the vf from the pool */
5157 reg &= ~pool_mask;
5159 /* if pool is empty then remove entry from vfta */
5160 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5161 (reg & E1000_VLVF_VLANID_ENABLE)) {
5162 reg = 0;
5163 vid = reg & E1000_VLVF_VLANID_MASK;
5164 igb_vfta_set(hw, vid, false);
5167 wr32(E1000_VLVF(i), reg);
5170 adapter->vf_data[vf].vlans_enabled = 0;
5173 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5175 struct e1000_hw *hw = &adapter->hw;
5176 u32 reg, i;
5178 /* The vlvf table only exists on 82576 hardware and newer */
5179 if (hw->mac.type < e1000_82576)
5180 return -1;
5182 /* we only need to do this if VMDq is enabled */
5183 if (!adapter->vfs_allocated_count)
5184 return -1;
5186 /* Find the vlan filter for this id */
5187 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5188 reg = rd32(E1000_VLVF(i));
5189 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5190 vid == (reg & E1000_VLVF_VLANID_MASK))
5191 break;
5194 if (add) {
5195 if (i == E1000_VLVF_ARRAY_SIZE) {
5196 /* Did not find a matching VLAN ID entry that was
5197 * enabled. Search for a free filter entry, i.e.
5198 * one without the enable bit set
5200 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5201 reg = rd32(E1000_VLVF(i));
5202 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5203 break;
5206 if (i < E1000_VLVF_ARRAY_SIZE) {
5207 /* Found an enabled/available entry */
5208 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5210 /* if !enabled we need to set this up in vfta */
5211 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5212 /* add VID to filter table */
5213 igb_vfta_set(hw, vid, true);
5214 reg |= E1000_VLVF_VLANID_ENABLE;
5216 reg &= ~E1000_VLVF_VLANID_MASK;
5217 reg |= vid;
5218 wr32(E1000_VLVF(i), reg);
5220 /* do not modify RLPML for PF devices */
5221 if (vf >= adapter->vfs_allocated_count)
5222 return 0;
5224 if (!adapter->vf_data[vf].vlans_enabled) {
5225 u32 size;
5226 reg = rd32(E1000_VMOLR(vf));
5227 size = reg & E1000_VMOLR_RLPML_MASK;
5228 size += 4;
5229 reg &= ~E1000_VMOLR_RLPML_MASK;
5230 reg |= size;
5231 wr32(E1000_VMOLR(vf), reg);
5234 adapter->vf_data[vf].vlans_enabled++;
5236 } else {
5237 if (i < E1000_VLVF_ARRAY_SIZE) {
5238 /* remove vf from the pool */
5239 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5240 /* if pool is empty then remove entry from vfta */
5241 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5242 reg = 0;
5243 igb_vfta_set(hw, vid, false);
5245 wr32(E1000_VLVF(i), reg);
5247 /* do not modify RLPML for PF devices */
5248 if (vf >= adapter->vfs_allocated_count)
5249 return 0;
5251 adapter->vf_data[vf].vlans_enabled--;
5252 if (!adapter->vf_data[vf].vlans_enabled) {
5253 u32 size;
5254 reg = rd32(E1000_VMOLR(vf));
5255 size = reg & E1000_VMOLR_RLPML_MASK;
5256 size -= 4;
5257 reg &= ~E1000_VMOLR_RLPML_MASK;
5258 reg |= size;
5259 wr32(E1000_VMOLR(vf), reg);
5263 return 0;
5266 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5268 struct e1000_hw *hw = &adapter->hw;
5270 if (vid)
5271 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5272 else
5273 wr32(E1000_VMVIR(vf), 0);
5276 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5277 int vf, u16 vlan, u8 qos)
5279 int err = 0;
5280 struct igb_adapter *adapter = netdev_priv(netdev);
5282 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5283 return -EINVAL;
5284 if (vlan || qos) {
5285 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5286 if (err)
5287 goto out;
5288 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5289 igb_set_vmolr(adapter, vf, !vlan);
5290 adapter->vf_data[vf].pf_vlan = vlan;
5291 adapter->vf_data[vf].pf_qos = qos;
5292 dev_info(&adapter->pdev->dev,
5293 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5294 if (test_bit(__IGB_DOWN, &adapter->state)) {
5295 dev_warn(&adapter->pdev->dev,
5296 "The VF VLAN has been set,"
5297 " but the PF device is not up.\n");
5298 dev_warn(&adapter->pdev->dev,
5299 "Bring the PF device up before"
5300 " attempting to use the VF device.\n");
5302 } else {
5303 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5304 false, vf);
5305 igb_set_vmvir(adapter, vlan, vf);
5306 igb_set_vmolr(adapter, vf, true);
5307 adapter->vf_data[vf].pf_vlan = 0;
5308 adapter->vf_data[vf].pf_qos = 0;
5310 out:
5311 return err;
5314 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5316 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5317 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5319 return igb_vlvf_set(adapter, vid, add, vf);
5322 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5324 /* clear flags - except flag that indicates PF has set the MAC */
5325 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5326 adapter->vf_data[vf].last_nack = jiffies;
5328 /* reset offloads to defaults */
5329 igb_set_vmolr(adapter, vf, true);
5331 /* reset vlans for device */
5332 igb_clear_vf_vfta(adapter, vf);
5333 if (adapter->vf_data[vf].pf_vlan)
5334 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5335 adapter->vf_data[vf].pf_vlan,
5336 adapter->vf_data[vf].pf_qos);
5337 else
5338 igb_clear_vf_vfta(adapter, vf);
5340 /* reset multicast table array for vf */
5341 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5343 /* Flush and reset the mta with the new values */
5344 igb_set_rx_mode(adapter->netdev);
5347 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5349 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5351 /* generate a new mac address as we were hotplug removed/added */
5352 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5353 random_ether_addr(vf_mac);
5355 /* process remaining reset events */
5356 igb_vf_reset(adapter, vf);
5359 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5361 struct e1000_hw *hw = &adapter->hw;
5362 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5363 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5364 u32 reg, msgbuf[3];
5365 u8 *addr = (u8 *)(&msgbuf[1]);
5367 /* process all the same items cleared in a function level reset */
5368 igb_vf_reset(adapter, vf);
5370 /* set vf mac address */
5371 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5373 /* enable transmit and receive for vf */
5374 reg = rd32(E1000_VFTE);
5375 wr32(E1000_VFTE, reg | (1 << vf));
5376 reg = rd32(E1000_VFRE);
5377 wr32(E1000_VFRE, reg | (1 << vf));
5379 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5381 /* reply to reset with ack and vf mac address */
5382 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5383 memcpy(addr, vf_mac, 6);
5384 igb_write_mbx(hw, msgbuf, 3, vf);
5387 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5390 * The VF MAC Address is stored in a packed array of bytes
5391 * starting at the second 32 bit word of the msg array
5393 unsigned char *addr = (char *)&msg[1];
5394 int err = -1;
5396 if (is_valid_ether_addr(addr))
5397 err = igb_set_vf_mac(adapter, vf, addr);
5399 return err;
5402 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5404 struct e1000_hw *hw = &adapter->hw;
5405 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5406 u32 msg = E1000_VT_MSGTYPE_NACK;
5408 /* if device isn't clear to send it shouldn't be reading either */
5409 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5410 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5411 igb_write_mbx(hw, &msg, 1, vf);
5412 vf_data->last_nack = jiffies;
5416 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5418 struct pci_dev *pdev = adapter->pdev;
5419 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5420 struct e1000_hw *hw = &adapter->hw;
5421 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5422 s32 retval;
5424 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5426 if (retval) {
5427 /* if receive failed revoke VF CTS stats and restart init */
5428 dev_err(&pdev->dev, "Error receiving message from VF\n");
5429 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5430 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5431 return;
5432 goto out;
5435 /* this is a message we already processed, do nothing */
5436 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5437 return;
5440 * until the vf completes a reset it should not be
5441 * allowed to start any configuration.
5444 if (msgbuf[0] == E1000_VF_RESET) {
5445 igb_vf_reset_msg(adapter, vf);
5446 return;
5449 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5450 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5451 return;
5452 retval = -1;
5453 goto out;
5456 switch ((msgbuf[0] & 0xFFFF)) {
5457 case E1000_VF_SET_MAC_ADDR:
5458 retval = -EINVAL;
5459 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5460 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5461 else
5462 dev_warn(&pdev->dev,
5463 "VF %d attempted to override administratively "
5464 "set MAC address\nReload the VF driver to "
5465 "resume operations\n", vf);
5466 break;
5467 case E1000_VF_SET_PROMISC:
5468 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5469 break;
5470 case E1000_VF_SET_MULTICAST:
5471 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5472 break;
5473 case E1000_VF_SET_LPE:
5474 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5475 break;
5476 case E1000_VF_SET_VLAN:
5477 retval = -1;
5478 if (vf_data->pf_vlan)
5479 dev_warn(&pdev->dev,
5480 "VF %d attempted to override administratively "
5481 "set VLAN tag\nReload the VF driver to "
5482 "resume operations\n", vf);
5483 else
5484 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5485 break;
5486 default:
5487 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5488 retval = -1;
5489 break;
5492 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5493 out:
5494 /* notify the VF of the results of what it sent us */
5495 if (retval)
5496 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5497 else
5498 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5500 igb_write_mbx(hw, msgbuf, 1, vf);
5503 static void igb_msg_task(struct igb_adapter *adapter)
5505 struct e1000_hw *hw = &adapter->hw;
5506 u32 vf;
5508 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5509 /* process any reset requests */
5510 if (!igb_check_for_rst(hw, vf))
5511 igb_vf_reset_event(adapter, vf);
5513 /* process any messages pending */
5514 if (!igb_check_for_msg(hw, vf))
5515 igb_rcv_msg_from_vf(adapter, vf);
5517 /* process any acks */
5518 if (!igb_check_for_ack(hw, vf))
5519 igb_rcv_ack_from_vf(adapter, vf);
5524 * igb_set_uta - Set unicast filter table address
5525 * @adapter: board private structure
5527 * The unicast table address is a register array of 32-bit registers.
5528 * The table is meant to be used in a way similar to how the MTA is used
5529 * however due to certain limitations in the hardware it is necessary to
5530 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5531 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5533 static void igb_set_uta(struct igb_adapter *adapter)
5535 struct e1000_hw *hw = &adapter->hw;
5536 int i;
5538 /* The UTA table only exists on 82576 hardware and newer */
5539 if (hw->mac.type < e1000_82576)
5540 return;
5542 /* we only need to do this if VMDq is enabled */
5543 if (!adapter->vfs_allocated_count)
5544 return;
5546 for (i = 0; i < hw->mac.uta_reg_count; i++)
5547 array_wr32(E1000_UTA, i, ~0);
5551 * igb_intr_msi - Interrupt Handler
5552 * @irq: interrupt number
5553 * @data: pointer to a network interface device structure
5555 static irqreturn_t igb_intr_msi(int irq, void *data)
5557 struct igb_adapter *adapter = data;
5558 struct igb_q_vector *q_vector = adapter->q_vector[0];
5559 struct e1000_hw *hw = &adapter->hw;
5560 /* read ICR disables interrupts using IAM */
5561 u32 icr = rd32(E1000_ICR);
5563 igb_write_itr(q_vector);
5565 if (icr & E1000_ICR_DRSTA)
5566 schedule_work(&adapter->reset_task);
5568 if (icr & E1000_ICR_DOUTSYNC) {
5569 /* HW is reporting DMA is out of sync */
5570 adapter->stats.doosync++;
5573 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5574 hw->mac.get_link_status = 1;
5575 if (!test_bit(__IGB_DOWN, &adapter->state))
5576 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5579 napi_schedule(&q_vector->napi);
5581 return IRQ_HANDLED;
5585 * igb_intr - Legacy Interrupt Handler
5586 * @irq: interrupt number
5587 * @data: pointer to a network interface device structure
5589 static irqreturn_t igb_intr(int irq, void *data)
5591 struct igb_adapter *adapter = data;
5592 struct igb_q_vector *q_vector = adapter->q_vector[0];
5593 struct e1000_hw *hw = &adapter->hw;
5594 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5595 * need for the IMC write */
5596 u32 icr = rd32(E1000_ICR);
5598 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5599 * not set, then the adapter didn't send an interrupt */
5600 if (!(icr & E1000_ICR_INT_ASSERTED))
5601 return IRQ_NONE;
5603 igb_write_itr(q_vector);
5605 if (icr & E1000_ICR_DRSTA)
5606 schedule_work(&adapter->reset_task);
5608 if (icr & E1000_ICR_DOUTSYNC) {
5609 /* HW is reporting DMA is out of sync */
5610 adapter->stats.doosync++;
5613 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5614 hw->mac.get_link_status = 1;
5615 /* guard against interrupt when we're going down */
5616 if (!test_bit(__IGB_DOWN, &adapter->state))
5617 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5620 napi_schedule(&q_vector->napi);
5622 return IRQ_HANDLED;
5625 void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5627 struct igb_adapter *adapter = q_vector->adapter;
5628 struct e1000_hw *hw = &adapter->hw;
5630 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5631 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5632 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5633 igb_set_itr(q_vector);
5634 else
5635 igb_update_ring_itr(q_vector);
5638 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5639 if (adapter->msix_entries)
5640 wr32(E1000_EIMS, q_vector->eims_value);
5641 else
5642 igb_irq_enable(adapter);
5647 * igb_poll - NAPI Rx polling callback
5648 * @napi: napi polling structure
5649 * @budget: count of how many packets we should handle
5651 static int igb_poll(struct napi_struct *napi, int budget)
5653 struct igb_q_vector *q_vector = container_of(napi,
5654 struct igb_q_vector,
5655 napi);
5656 bool clean_complete = true;
5658 #ifdef CONFIG_IGB_DCA
5659 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5660 igb_update_dca(q_vector);
5661 #endif
5662 if (q_vector->tx.ring)
5663 clean_complete = igb_clean_tx_irq(q_vector);
5665 if (q_vector->rx.ring)
5666 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5668 /* If all work not completed, return budget and keep polling */
5669 if (!clean_complete)
5670 return budget;
5672 /* If not enough Rx work done, exit the polling mode */
5673 napi_complete(napi);
5674 igb_ring_irq_enable(q_vector);
5676 return 0;
5680 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5681 * @adapter: board private structure
5682 * @shhwtstamps: timestamp structure to update
5683 * @regval: unsigned 64bit system time value.
5685 * We need to convert the system time value stored in the RX/TXSTMP registers
5686 * into a hwtstamp which can be used by the upper level timestamping functions
5688 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5689 struct skb_shared_hwtstamps *shhwtstamps,
5690 u64 regval)
5692 u64 ns;
5695 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5696 * 24 to match clock shift we setup earlier.
5698 if (adapter->hw.mac.type >= e1000_82580)
5699 regval <<= IGB_82580_TSYNC_SHIFT;
5701 ns = timecounter_cyc2time(&adapter->clock, regval);
5702 timecompare_update(&adapter->compare, ns);
5703 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5704 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5705 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5709 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5710 * @q_vector: pointer to q_vector containing needed info
5711 * @buffer: pointer to igb_tx_buffer structure
5713 * If we were asked to do hardware stamping and such a time stamp is
5714 * available, then it must have been for this skb here because we only
5715 * allow only one such packet into the queue.
5717 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5718 struct igb_tx_buffer *buffer_info)
5720 struct igb_adapter *adapter = q_vector->adapter;
5721 struct e1000_hw *hw = &adapter->hw;
5722 struct skb_shared_hwtstamps shhwtstamps;
5723 u64 regval;
5725 /* if skb does not support hw timestamp or TX stamp not valid exit */
5726 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5727 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5728 return;
5730 regval = rd32(E1000_TXSTMPL);
5731 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5733 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5734 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5738 * igb_clean_tx_irq - Reclaim resources after transmit completes
5739 * @q_vector: pointer to q_vector containing needed info
5740 * returns true if ring is completely cleaned
5742 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5744 struct igb_adapter *adapter = q_vector->adapter;
5745 struct igb_ring *tx_ring = q_vector->tx.ring;
5746 struct igb_tx_buffer *tx_buffer;
5747 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5748 unsigned int total_bytes = 0, total_packets = 0;
5749 unsigned int budget = q_vector->tx.work_limit;
5750 unsigned int i = tx_ring->next_to_clean;
5752 if (test_bit(__IGB_DOWN, &adapter->state))
5753 return true;
5755 tx_buffer = &tx_ring->tx_buffer_info[i];
5756 tx_desc = IGB_TX_DESC(tx_ring, i);
5757 i -= tx_ring->count;
5759 for (; budget; budget--) {
5760 eop_desc = tx_buffer->next_to_watch;
5762 /* prevent any other reads prior to eop_desc */
5763 rmb();
5765 /* if next_to_watch is not set then there is no work pending */
5766 if (!eop_desc)
5767 break;
5769 /* if DD is not set pending work has not been completed */
5770 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5771 break;
5773 /* clear next_to_watch to prevent false hangs */
5774 tx_buffer->next_to_watch = NULL;
5776 /* update the statistics for this packet */
5777 total_bytes += tx_buffer->bytecount;
5778 total_packets += tx_buffer->gso_segs;
5780 /* retrieve hardware timestamp */
5781 igb_tx_hwtstamp(q_vector, tx_buffer);
5783 /* free the skb */
5784 dev_kfree_skb_any(tx_buffer->skb);
5785 tx_buffer->skb = NULL;
5787 /* unmap skb header data */
5788 dma_unmap_single(tx_ring->dev,
5789 tx_buffer->dma,
5790 tx_buffer->length,
5791 DMA_TO_DEVICE);
5793 /* clear last DMA location and unmap remaining buffers */
5794 while (tx_desc != eop_desc) {
5795 tx_buffer->dma = 0;
5797 tx_buffer++;
5798 tx_desc++;
5799 i++;
5800 if (unlikely(!i)) {
5801 i -= tx_ring->count;
5802 tx_buffer = tx_ring->tx_buffer_info;
5803 tx_desc = IGB_TX_DESC(tx_ring, 0);
5806 /* unmap any remaining paged data */
5807 if (tx_buffer->dma) {
5808 dma_unmap_page(tx_ring->dev,
5809 tx_buffer->dma,
5810 tx_buffer->length,
5811 DMA_TO_DEVICE);
5815 /* clear last DMA location */
5816 tx_buffer->dma = 0;
5818 /* move us one more past the eop_desc for start of next pkt */
5819 tx_buffer++;
5820 tx_desc++;
5821 i++;
5822 if (unlikely(!i)) {
5823 i -= tx_ring->count;
5824 tx_buffer = tx_ring->tx_buffer_info;
5825 tx_desc = IGB_TX_DESC(tx_ring, 0);
5829 netdev_tx_completed_queue(txring_txq(tx_ring),
5830 total_packets, total_bytes);
5831 i += tx_ring->count;
5832 tx_ring->next_to_clean = i;
5833 u64_stats_update_begin(&tx_ring->tx_syncp);
5834 tx_ring->tx_stats.bytes += total_bytes;
5835 tx_ring->tx_stats.packets += total_packets;
5836 u64_stats_update_end(&tx_ring->tx_syncp);
5837 q_vector->tx.total_bytes += total_bytes;
5838 q_vector->tx.total_packets += total_packets;
5840 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5841 struct e1000_hw *hw = &adapter->hw;
5843 eop_desc = tx_buffer->next_to_watch;
5845 /* Detect a transmit hang in hardware, this serializes the
5846 * check with the clearing of time_stamp and movement of i */
5847 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5848 if (eop_desc &&
5849 time_after(jiffies, tx_buffer->time_stamp +
5850 (adapter->tx_timeout_factor * HZ)) &&
5851 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5853 /* detected Tx unit hang */
5854 dev_err(tx_ring->dev,
5855 "Detected Tx Unit Hang\n"
5856 " Tx Queue <%d>\n"
5857 " TDH <%x>\n"
5858 " TDT <%x>\n"
5859 " next_to_use <%x>\n"
5860 " next_to_clean <%x>\n"
5861 "buffer_info[next_to_clean]\n"
5862 " time_stamp <%lx>\n"
5863 " next_to_watch <%p>\n"
5864 " jiffies <%lx>\n"
5865 " desc.status <%x>\n",
5866 tx_ring->queue_index,
5867 rd32(E1000_TDH(tx_ring->reg_idx)),
5868 readl(tx_ring->tail),
5869 tx_ring->next_to_use,
5870 tx_ring->next_to_clean,
5871 tx_buffer->time_stamp,
5872 eop_desc,
5873 jiffies,
5874 eop_desc->wb.status);
5875 netif_stop_subqueue(tx_ring->netdev,
5876 tx_ring->queue_index);
5878 /* we are about to reset, no point in enabling stuff */
5879 return true;
5883 if (unlikely(total_packets &&
5884 netif_carrier_ok(tx_ring->netdev) &&
5885 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5886 /* Make sure that anybody stopping the queue after this
5887 * sees the new next_to_clean.
5889 smp_mb();
5890 if (__netif_subqueue_stopped(tx_ring->netdev,
5891 tx_ring->queue_index) &&
5892 !(test_bit(__IGB_DOWN, &adapter->state))) {
5893 netif_wake_subqueue(tx_ring->netdev,
5894 tx_ring->queue_index);
5896 u64_stats_update_begin(&tx_ring->tx_syncp);
5897 tx_ring->tx_stats.restart_queue++;
5898 u64_stats_update_end(&tx_ring->tx_syncp);
5902 return !!budget;
5905 static inline void igb_rx_checksum(struct igb_ring *ring,
5906 union e1000_adv_rx_desc *rx_desc,
5907 struct sk_buff *skb)
5909 skb_checksum_none_assert(skb);
5911 /* Ignore Checksum bit is set */
5912 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5913 return;
5915 /* Rx checksum disabled via ethtool */
5916 if (!(ring->netdev->features & NETIF_F_RXCSUM))
5917 return;
5919 /* TCP/UDP checksum error bit is set */
5920 if (igb_test_staterr(rx_desc,
5921 E1000_RXDEXT_STATERR_TCPE |
5922 E1000_RXDEXT_STATERR_IPE)) {
5924 * work around errata with sctp packets where the TCPE aka
5925 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5926 * packets, (aka let the stack check the crc32c)
5928 if (!((skb->len == 60) &&
5929 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5930 u64_stats_update_begin(&ring->rx_syncp);
5931 ring->rx_stats.csum_err++;
5932 u64_stats_update_end(&ring->rx_syncp);
5934 /* let the stack verify checksum errors */
5935 return;
5937 /* It must be a TCP or UDP packet with a valid checksum */
5938 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5939 E1000_RXD_STAT_UDPCS))
5940 skb->ip_summed = CHECKSUM_UNNECESSARY;
5942 dev_dbg(ring->dev, "cksum success: bits %08X\n",
5943 le32_to_cpu(rx_desc->wb.upper.status_error));
5946 static inline void igb_rx_hash(struct igb_ring *ring,
5947 union e1000_adv_rx_desc *rx_desc,
5948 struct sk_buff *skb)
5950 if (ring->netdev->features & NETIF_F_RXHASH)
5951 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5954 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5955 union e1000_adv_rx_desc *rx_desc,
5956 struct sk_buff *skb)
5958 struct igb_adapter *adapter = q_vector->adapter;
5959 struct e1000_hw *hw = &adapter->hw;
5960 u64 regval;
5962 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5963 E1000_RXDADV_STAT_TS))
5964 return;
5967 * If this bit is set, then the RX registers contain the time stamp. No
5968 * other packet will be time stamped until we read these registers, so
5969 * read the registers to make them available again. Because only one
5970 * packet can be time stamped at a time, we know that the register
5971 * values must belong to this one here and therefore we don't need to
5972 * compare any of the additional attributes stored for it.
5974 * If nothing went wrong, then it should have a shared tx_flags that we
5975 * can turn into a skb_shared_hwtstamps.
5977 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5978 u32 *stamp = (u32 *)skb->data;
5979 regval = le32_to_cpu(*(stamp + 2));
5980 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5981 skb_pull(skb, IGB_TS_HDR_LEN);
5982 } else {
5983 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5984 return;
5986 regval = rd32(E1000_RXSTMPL);
5987 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5990 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5993 static void igb_rx_vlan(struct igb_ring *ring,
5994 union e1000_adv_rx_desc *rx_desc,
5995 struct sk_buff *skb)
5997 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5998 u16 vid;
5999 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6000 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6001 vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6002 else
6003 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6005 __vlan_hwaccel_put_tag(skb, vid);
6009 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6011 /* HW will not DMA in data larger than the given buffer, even if it
6012 * parses the (NFS, of course) header to be larger. In that case, it
6013 * fills the header buffer and spills the rest into the page.
6015 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6016 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6017 if (hlen > IGB_RX_HDR_LEN)
6018 hlen = IGB_RX_HDR_LEN;
6019 return hlen;
6022 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6024 struct igb_ring *rx_ring = q_vector->rx.ring;
6025 union e1000_adv_rx_desc *rx_desc;
6026 const int current_node = numa_node_id();
6027 unsigned int total_bytes = 0, total_packets = 0;
6028 u16 cleaned_count = igb_desc_unused(rx_ring);
6029 u16 i = rx_ring->next_to_clean;
6031 rx_desc = IGB_RX_DESC(rx_ring, i);
6033 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6034 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6035 struct sk_buff *skb = buffer_info->skb;
6036 union e1000_adv_rx_desc *next_rxd;
6038 buffer_info->skb = NULL;
6039 prefetch(skb->data);
6041 i++;
6042 if (i == rx_ring->count)
6043 i = 0;
6045 next_rxd = IGB_RX_DESC(rx_ring, i);
6046 prefetch(next_rxd);
6049 * This memory barrier is needed to keep us from reading
6050 * any other fields out of the rx_desc until we know the
6051 * RXD_STAT_DD bit is set
6053 rmb();
6055 if (!skb_is_nonlinear(skb)) {
6056 __skb_put(skb, igb_get_hlen(rx_desc));
6057 dma_unmap_single(rx_ring->dev, buffer_info->dma,
6058 IGB_RX_HDR_LEN,
6059 DMA_FROM_DEVICE);
6060 buffer_info->dma = 0;
6063 if (rx_desc->wb.upper.length) {
6064 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6066 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6067 buffer_info->page,
6068 buffer_info->page_offset,
6069 length);
6071 skb->len += length;
6072 skb->data_len += length;
6073 skb->truesize += PAGE_SIZE / 2;
6075 if ((page_count(buffer_info->page) != 1) ||
6076 (page_to_nid(buffer_info->page) != current_node))
6077 buffer_info->page = NULL;
6078 else
6079 get_page(buffer_info->page);
6081 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6082 PAGE_SIZE / 2, DMA_FROM_DEVICE);
6083 buffer_info->page_dma = 0;
6086 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6087 struct igb_rx_buffer *next_buffer;
6088 next_buffer = &rx_ring->rx_buffer_info[i];
6089 buffer_info->skb = next_buffer->skb;
6090 buffer_info->dma = next_buffer->dma;
6091 next_buffer->skb = skb;
6092 next_buffer->dma = 0;
6093 goto next_desc;
6096 if (igb_test_staterr(rx_desc,
6097 E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
6098 dev_kfree_skb_any(skb);
6099 goto next_desc;
6102 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6103 igb_rx_hash(rx_ring, rx_desc, skb);
6104 igb_rx_checksum(rx_ring, rx_desc, skb);
6105 igb_rx_vlan(rx_ring, rx_desc, skb);
6107 total_bytes += skb->len;
6108 total_packets++;
6110 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6112 napi_gro_receive(&q_vector->napi, skb);
6114 budget--;
6115 next_desc:
6116 if (!budget)
6117 break;
6119 cleaned_count++;
6120 /* return some buffers to hardware, one at a time is too slow */
6121 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6122 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6123 cleaned_count = 0;
6126 /* use prefetched values */
6127 rx_desc = next_rxd;
6130 rx_ring->next_to_clean = i;
6131 u64_stats_update_begin(&rx_ring->rx_syncp);
6132 rx_ring->rx_stats.packets += total_packets;
6133 rx_ring->rx_stats.bytes += total_bytes;
6134 u64_stats_update_end(&rx_ring->rx_syncp);
6135 q_vector->rx.total_packets += total_packets;
6136 q_vector->rx.total_bytes += total_bytes;
6138 if (cleaned_count)
6139 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6141 return !!budget;
6144 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6145 struct igb_rx_buffer *bi)
6147 struct sk_buff *skb = bi->skb;
6148 dma_addr_t dma = bi->dma;
6150 if (dma)
6151 return true;
6153 if (likely(!skb)) {
6154 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6155 IGB_RX_HDR_LEN);
6156 bi->skb = skb;
6157 if (!skb) {
6158 rx_ring->rx_stats.alloc_failed++;
6159 return false;
6162 /* initialize skb for ring */
6163 skb_record_rx_queue(skb, rx_ring->queue_index);
6166 dma = dma_map_single(rx_ring->dev, skb->data,
6167 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6169 if (dma_mapping_error(rx_ring->dev, dma)) {
6170 rx_ring->rx_stats.alloc_failed++;
6171 return false;
6174 bi->dma = dma;
6175 return true;
6178 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6179 struct igb_rx_buffer *bi)
6181 struct page *page = bi->page;
6182 dma_addr_t page_dma = bi->page_dma;
6183 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6185 if (page_dma)
6186 return true;
6188 if (!page) {
6189 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6190 bi->page = page;
6191 if (unlikely(!page)) {
6192 rx_ring->rx_stats.alloc_failed++;
6193 return false;
6197 page_dma = dma_map_page(rx_ring->dev, page,
6198 page_offset, PAGE_SIZE / 2,
6199 DMA_FROM_DEVICE);
6201 if (dma_mapping_error(rx_ring->dev, page_dma)) {
6202 rx_ring->rx_stats.alloc_failed++;
6203 return false;
6206 bi->page_dma = page_dma;
6207 bi->page_offset = page_offset;
6208 return true;
6212 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6213 * @adapter: address of board private structure
6215 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6217 union e1000_adv_rx_desc *rx_desc;
6218 struct igb_rx_buffer *bi;
6219 u16 i = rx_ring->next_to_use;
6221 rx_desc = IGB_RX_DESC(rx_ring, i);
6222 bi = &rx_ring->rx_buffer_info[i];
6223 i -= rx_ring->count;
6225 while (cleaned_count--) {
6226 if (!igb_alloc_mapped_skb(rx_ring, bi))
6227 break;
6229 /* Refresh the desc even if buffer_addrs didn't change
6230 * because each write-back erases this info. */
6231 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6233 if (!igb_alloc_mapped_page(rx_ring, bi))
6234 break;
6236 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6238 rx_desc++;
6239 bi++;
6240 i++;
6241 if (unlikely(!i)) {
6242 rx_desc = IGB_RX_DESC(rx_ring, 0);
6243 bi = rx_ring->rx_buffer_info;
6244 i -= rx_ring->count;
6247 /* clear the hdr_addr for the next_to_use descriptor */
6248 rx_desc->read.hdr_addr = 0;
6251 i += rx_ring->count;
6253 if (rx_ring->next_to_use != i) {
6254 rx_ring->next_to_use = i;
6256 /* Force memory writes to complete before letting h/w
6257 * know there are new descriptors to fetch. (Only
6258 * applicable for weak-ordered memory model archs,
6259 * such as IA-64). */
6260 wmb();
6261 writel(i, rx_ring->tail);
6266 * igb_mii_ioctl -
6267 * @netdev:
6268 * @ifreq:
6269 * @cmd:
6271 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6273 struct igb_adapter *adapter = netdev_priv(netdev);
6274 struct mii_ioctl_data *data = if_mii(ifr);
6276 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6277 return -EOPNOTSUPP;
6279 switch (cmd) {
6280 case SIOCGMIIPHY:
6281 data->phy_id = adapter->hw.phy.addr;
6282 break;
6283 case SIOCGMIIREG:
6284 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6285 &data->val_out))
6286 return -EIO;
6287 break;
6288 case SIOCSMIIREG:
6289 default:
6290 return -EOPNOTSUPP;
6292 return 0;
6296 * igb_hwtstamp_ioctl - control hardware time stamping
6297 * @netdev:
6298 * @ifreq:
6299 * @cmd:
6301 * Outgoing time stamping can be enabled and disabled. Play nice and
6302 * disable it when requested, although it shouldn't case any overhead
6303 * when no packet needs it. At most one packet in the queue may be
6304 * marked for time stamping, otherwise it would be impossible to tell
6305 * for sure to which packet the hardware time stamp belongs.
6307 * Incoming time stamping has to be configured via the hardware
6308 * filters. Not all combinations are supported, in particular event
6309 * type has to be specified. Matching the kind of event packet is
6310 * not supported, with the exception of "all V2 events regardless of
6311 * level 2 or 4".
6314 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6315 struct ifreq *ifr, int cmd)
6317 struct igb_adapter *adapter = netdev_priv(netdev);
6318 struct e1000_hw *hw = &adapter->hw;
6319 struct hwtstamp_config config;
6320 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6321 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6322 u32 tsync_rx_cfg = 0;
6323 bool is_l4 = false;
6324 bool is_l2 = false;
6325 u32 regval;
6327 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6328 return -EFAULT;
6330 /* reserved for future extensions */
6331 if (config.flags)
6332 return -EINVAL;
6334 switch (config.tx_type) {
6335 case HWTSTAMP_TX_OFF:
6336 tsync_tx_ctl = 0;
6337 case HWTSTAMP_TX_ON:
6338 break;
6339 default:
6340 return -ERANGE;
6343 switch (config.rx_filter) {
6344 case HWTSTAMP_FILTER_NONE:
6345 tsync_rx_ctl = 0;
6346 break;
6347 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6348 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6349 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6350 case HWTSTAMP_FILTER_ALL:
6352 * register TSYNCRXCFG must be set, therefore it is not
6353 * possible to time stamp both Sync and Delay_Req messages
6354 * => fall back to time stamping all packets
6356 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6357 config.rx_filter = HWTSTAMP_FILTER_ALL;
6358 break;
6359 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6360 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6361 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6362 is_l4 = true;
6363 break;
6364 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6365 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6366 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6367 is_l4 = true;
6368 break;
6369 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6370 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6371 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6372 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6373 is_l2 = true;
6374 is_l4 = true;
6375 config.rx_filter = HWTSTAMP_FILTER_SOME;
6376 break;
6377 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6378 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6379 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6380 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6381 is_l2 = true;
6382 is_l4 = true;
6383 config.rx_filter = HWTSTAMP_FILTER_SOME;
6384 break;
6385 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6386 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6387 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6388 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6389 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6390 is_l2 = true;
6391 is_l4 = true;
6392 break;
6393 default:
6394 return -ERANGE;
6397 if (hw->mac.type == e1000_82575) {
6398 if (tsync_rx_ctl | tsync_tx_ctl)
6399 return -EINVAL;
6400 return 0;
6404 * Per-packet timestamping only works if all packets are
6405 * timestamped, so enable timestamping in all packets as
6406 * long as one rx filter was configured.
6408 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6409 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6410 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6413 /* enable/disable TX */
6414 regval = rd32(E1000_TSYNCTXCTL);
6415 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6416 regval |= tsync_tx_ctl;
6417 wr32(E1000_TSYNCTXCTL, regval);
6419 /* enable/disable RX */
6420 regval = rd32(E1000_TSYNCRXCTL);
6421 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6422 regval |= tsync_rx_ctl;
6423 wr32(E1000_TSYNCRXCTL, regval);
6425 /* define which PTP packets are time stamped */
6426 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6428 /* define ethertype filter for timestamped packets */
6429 if (is_l2)
6430 wr32(E1000_ETQF(3),
6431 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6432 E1000_ETQF_1588 | /* enable timestamping */
6433 ETH_P_1588)); /* 1588 eth protocol type */
6434 else
6435 wr32(E1000_ETQF(3), 0);
6437 #define PTP_PORT 319
6438 /* L4 Queue Filter[3]: filter by destination port and protocol */
6439 if (is_l4) {
6440 u32 ftqf = (IPPROTO_UDP /* UDP */
6441 | E1000_FTQF_VF_BP /* VF not compared */
6442 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6443 | E1000_FTQF_MASK); /* mask all inputs */
6444 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6446 wr32(E1000_IMIR(3), htons(PTP_PORT));
6447 wr32(E1000_IMIREXT(3),
6448 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6449 if (hw->mac.type == e1000_82576) {
6450 /* enable source port check */
6451 wr32(E1000_SPQF(3), htons(PTP_PORT));
6452 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6454 wr32(E1000_FTQF(3), ftqf);
6455 } else {
6456 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6458 wrfl();
6460 adapter->hwtstamp_config = config;
6462 /* clear TX/RX time stamp registers, just to be sure */
6463 regval = rd32(E1000_TXSTMPH);
6464 regval = rd32(E1000_RXSTMPH);
6466 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6467 -EFAULT : 0;
6471 * igb_ioctl -
6472 * @netdev:
6473 * @ifreq:
6474 * @cmd:
6476 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6478 switch (cmd) {
6479 case SIOCGMIIPHY:
6480 case SIOCGMIIREG:
6481 case SIOCSMIIREG:
6482 return igb_mii_ioctl(netdev, ifr, cmd);
6483 case SIOCSHWTSTAMP:
6484 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6485 default:
6486 return -EOPNOTSUPP;
6490 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6492 struct igb_adapter *adapter = hw->back;
6493 u16 cap_offset;
6495 cap_offset = adapter->pdev->pcie_cap;
6496 if (!cap_offset)
6497 return -E1000_ERR_CONFIG;
6499 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6501 return 0;
6504 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6506 struct igb_adapter *adapter = hw->back;
6507 u16 cap_offset;
6509 cap_offset = adapter->pdev->pcie_cap;
6510 if (!cap_offset)
6511 return -E1000_ERR_CONFIG;
6513 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6515 return 0;
6518 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6520 struct igb_adapter *adapter = netdev_priv(netdev);
6521 struct e1000_hw *hw = &adapter->hw;
6522 u32 ctrl, rctl;
6523 bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6525 if (enable) {
6526 /* enable VLAN tag insert/strip */
6527 ctrl = rd32(E1000_CTRL);
6528 ctrl |= E1000_CTRL_VME;
6529 wr32(E1000_CTRL, ctrl);
6531 /* Disable CFI check */
6532 rctl = rd32(E1000_RCTL);
6533 rctl &= ~E1000_RCTL_CFIEN;
6534 wr32(E1000_RCTL, rctl);
6535 } else {
6536 /* disable VLAN tag insert/strip */
6537 ctrl = rd32(E1000_CTRL);
6538 ctrl &= ~E1000_CTRL_VME;
6539 wr32(E1000_CTRL, ctrl);
6542 igb_rlpml_set(adapter);
6545 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6547 struct igb_adapter *adapter = netdev_priv(netdev);
6548 struct e1000_hw *hw = &adapter->hw;
6549 int pf_id = adapter->vfs_allocated_count;
6551 /* attempt to add filter to vlvf array */
6552 igb_vlvf_set(adapter, vid, true, pf_id);
6554 /* add the filter since PF can receive vlans w/o entry in vlvf */
6555 igb_vfta_set(hw, vid, true);
6557 set_bit(vid, adapter->active_vlans);
6559 return 0;
6562 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6564 struct igb_adapter *adapter = netdev_priv(netdev);
6565 struct e1000_hw *hw = &adapter->hw;
6566 int pf_id = adapter->vfs_allocated_count;
6567 s32 err;
6569 /* remove vlan from VLVF table array */
6570 err = igb_vlvf_set(adapter, vid, false, pf_id);
6572 /* if vid was not present in VLVF just remove it from table */
6573 if (err)
6574 igb_vfta_set(hw, vid, false);
6576 clear_bit(vid, adapter->active_vlans);
6578 return 0;
6581 static void igb_restore_vlan(struct igb_adapter *adapter)
6583 u16 vid;
6585 igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6587 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6588 igb_vlan_rx_add_vid(adapter->netdev, vid);
6591 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6593 struct pci_dev *pdev = adapter->pdev;
6594 struct e1000_mac_info *mac = &adapter->hw.mac;
6596 mac->autoneg = 0;
6598 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6599 * for the switch() below to work */
6600 if ((spd & 1) || (dplx & ~1))
6601 goto err_inval;
6603 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6604 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6605 spd != SPEED_1000 &&
6606 dplx != DUPLEX_FULL)
6607 goto err_inval;
6609 switch (spd + dplx) {
6610 case SPEED_10 + DUPLEX_HALF:
6611 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6612 break;
6613 case SPEED_10 + DUPLEX_FULL:
6614 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6615 break;
6616 case SPEED_100 + DUPLEX_HALF:
6617 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6618 break;
6619 case SPEED_100 + DUPLEX_FULL:
6620 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6621 break;
6622 case SPEED_1000 + DUPLEX_FULL:
6623 mac->autoneg = 1;
6624 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6625 break;
6626 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6627 default:
6628 goto err_inval;
6630 return 0;
6632 err_inval:
6633 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6634 return -EINVAL;
6637 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6638 bool runtime)
6640 struct net_device *netdev = pci_get_drvdata(pdev);
6641 struct igb_adapter *adapter = netdev_priv(netdev);
6642 struct e1000_hw *hw = &adapter->hw;
6643 u32 ctrl, rctl, status;
6644 u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6645 #ifdef CONFIG_PM
6646 int retval = 0;
6647 #endif
6649 netif_device_detach(netdev);
6651 if (netif_running(netdev))
6652 __igb_close(netdev, true);
6654 igb_clear_interrupt_scheme(adapter);
6656 #ifdef CONFIG_PM
6657 retval = pci_save_state(pdev);
6658 if (retval)
6659 return retval;
6660 #endif
6662 status = rd32(E1000_STATUS);
6663 if (status & E1000_STATUS_LU)
6664 wufc &= ~E1000_WUFC_LNKC;
6666 if (wufc) {
6667 igb_setup_rctl(adapter);
6668 igb_set_rx_mode(netdev);
6670 /* turn on all-multi mode if wake on multicast is enabled */
6671 if (wufc & E1000_WUFC_MC) {
6672 rctl = rd32(E1000_RCTL);
6673 rctl |= E1000_RCTL_MPE;
6674 wr32(E1000_RCTL, rctl);
6677 ctrl = rd32(E1000_CTRL);
6678 /* advertise wake from D3Cold */
6679 #define E1000_CTRL_ADVD3WUC 0x00100000
6680 /* phy power management enable */
6681 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6682 ctrl |= E1000_CTRL_ADVD3WUC;
6683 wr32(E1000_CTRL, ctrl);
6685 /* Allow time for pending master requests to run */
6686 igb_disable_pcie_master(hw);
6688 wr32(E1000_WUC, E1000_WUC_PME_EN);
6689 wr32(E1000_WUFC, wufc);
6690 } else {
6691 wr32(E1000_WUC, 0);
6692 wr32(E1000_WUFC, 0);
6695 *enable_wake = wufc || adapter->en_mng_pt;
6696 if (!*enable_wake)
6697 igb_power_down_link(adapter);
6698 else
6699 igb_power_up_link(adapter);
6701 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6702 * would have already happened in close and is redundant. */
6703 igb_release_hw_control(adapter);
6705 pci_disable_device(pdev);
6707 return 0;
6710 #ifdef CONFIG_PM
6711 static int igb_suspend(struct device *dev)
6713 int retval;
6714 bool wake;
6715 struct pci_dev *pdev = to_pci_dev(dev);
6717 retval = __igb_shutdown(pdev, &wake, 0);
6718 if (retval)
6719 return retval;
6721 if (wake) {
6722 pci_prepare_to_sleep(pdev);
6723 } else {
6724 pci_wake_from_d3(pdev, false);
6725 pci_set_power_state(pdev, PCI_D3hot);
6728 return 0;
6731 static int igb_resume(struct device *dev)
6733 struct pci_dev *pdev = to_pci_dev(dev);
6734 struct net_device *netdev = pci_get_drvdata(pdev);
6735 struct igb_adapter *adapter = netdev_priv(netdev);
6736 struct e1000_hw *hw = &adapter->hw;
6737 u32 err;
6739 pci_set_power_state(pdev, PCI_D0);
6740 pci_restore_state(pdev);
6741 pci_save_state(pdev);
6743 err = pci_enable_device_mem(pdev);
6744 if (err) {
6745 dev_err(&pdev->dev,
6746 "igb: Cannot enable PCI device from suspend\n");
6747 return err;
6749 pci_set_master(pdev);
6751 pci_enable_wake(pdev, PCI_D3hot, 0);
6752 pci_enable_wake(pdev, PCI_D3cold, 0);
6754 if (!rtnl_is_locked()) {
6756 * shut up ASSERT_RTNL() warning in
6757 * netif_set_real_num_tx/rx_queues.
6759 rtnl_lock();
6760 err = igb_init_interrupt_scheme(adapter);
6761 rtnl_unlock();
6762 } else {
6763 err = igb_init_interrupt_scheme(adapter);
6765 if (err) {
6766 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6767 return -ENOMEM;
6770 igb_reset(adapter);
6772 /* let the f/w know that the h/w is now under the control of the
6773 * driver. */
6774 igb_get_hw_control(adapter);
6776 wr32(E1000_WUS, ~0);
6778 if (netdev->flags & IFF_UP) {
6779 err = __igb_open(netdev, true);
6780 if (err)
6781 return err;
6784 netif_device_attach(netdev);
6785 return 0;
6788 #ifdef CONFIG_PM_RUNTIME
6789 static int igb_runtime_idle(struct device *dev)
6791 struct pci_dev *pdev = to_pci_dev(dev);
6792 struct net_device *netdev = pci_get_drvdata(pdev);
6793 struct igb_adapter *adapter = netdev_priv(netdev);
6795 if (!igb_has_link(adapter))
6796 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6798 return -EBUSY;
6801 static int igb_runtime_suspend(struct device *dev)
6803 struct pci_dev *pdev = to_pci_dev(dev);
6804 int retval;
6805 bool wake;
6807 retval = __igb_shutdown(pdev, &wake, 1);
6808 if (retval)
6809 return retval;
6811 if (wake) {
6812 pci_prepare_to_sleep(pdev);
6813 } else {
6814 pci_wake_from_d3(pdev, false);
6815 pci_set_power_state(pdev, PCI_D3hot);
6818 return 0;
6821 static int igb_runtime_resume(struct device *dev)
6823 return igb_resume(dev);
6825 #endif /* CONFIG_PM_RUNTIME */
6826 #endif
6828 static void igb_shutdown(struct pci_dev *pdev)
6830 bool wake;
6832 __igb_shutdown(pdev, &wake, 0);
6834 if (system_state == SYSTEM_POWER_OFF) {
6835 pci_wake_from_d3(pdev, wake);
6836 pci_set_power_state(pdev, PCI_D3hot);
6840 #ifdef CONFIG_NET_POLL_CONTROLLER
6842 * Polling 'interrupt' - used by things like netconsole to send skbs
6843 * without having to re-enable interrupts. It's not called while
6844 * the interrupt routine is executing.
6846 static void igb_netpoll(struct net_device *netdev)
6848 struct igb_adapter *adapter = netdev_priv(netdev);
6849 struct e1000_hw *hw = &adapter->hw;
6850 struct igb_q_vector *q_vector;
6851 int i;
6853 for (i = 0; i < adapter->num_q_vectors; i++) {
6854 q_vector = adapter->q_vector[i];
6855 if (adapter->msix_entries)
6856 wr32(E1000_EIMC, q_vector->eims_value);
6857 else
6858 igb_irq_disable(adapter);
6859 napi_schedule(&q_vector->napi);
6862 #endif /* CONFIG_NET_POLL_CONTROLLER */
6865 * igb_io_error_detected - called when PCI error is detected
6866 * @pdev: Pointer to PCI device
6867 * @state: The current pci connection state
6869 * This function is called after a PCI bus error affecting
6870 * this device has been detected.
6872 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6873 pci_channel_state_t state)
6875 struct net_device *netdev = pci_get_drvdata(pdev);
6876 struct igb_adapter *adapter = netdev_priv(netdev);
6878 netif_device_detach(netdev);
6880 if (state == pci_channel_io_perm_failure)
6881 return PCI_ERS_RESULT_DISCONNECT;
6883 if (netif_running(netdev))
6884 igb_down(adapter);
6885 pci_disable_device(pdev);
6887 /* Request a slot slot reset. */
6888 return PCI_ERS_RESULT_NEED_RESET;
6892 * igb_io_slot_reset - called after the pci bus has been reset.
6893 * @pdev: Pointer to PCI device
6895 * Restart the card from scratch, as if from a cold-boot. Implementation
6896 * resembles the first-half of the igb_resume routine.
6898 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6900 struct net_device *netdev = pci_get_drvdata(pdev);
6901 struct igb_adapter *adapter = netdev_priv(netdev);
6902 struct e1000_hw *hw = &adapter->hw;
6903 pci_ers_result_t result;
6904 int err;
6906 if (pci_enable_device_mem(pdev)) {
6907 dev_err(&pdev->dev,
6908 "Cannot re-enable PCI device after reset.\n");
6909 result = PCI_ERS_RESULT_DISCONNECT;
6910 } else {
6911 pci_set_master(pdev);
6912 pci_restore_state(pdev);
6913 pci_save_state(pdev);
6915 pci_enable_wake(pdev, PCI_D3hot, 0);
6916 pci_enable_wake(pdev, PCI_D3cold, 0);
6918 igb_reset(adapter);
6919 wr32(E1000_WUS, ~0);
6920 result = PCI_ERS_RESULT_RECOVERED;
6923 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6924 if (err) {
6925 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6926 "failed 0x%0x\n", err);
6927 /* non-fatal, continue */
6930 return result;
6934 * igb_io_resume - called when traffic can start flowing again.
6935 * @pdev: Pointer to PCI device
6937 * This callback is called when the error recovery driver tells us that
6938 * its OK to resume normal operation. Implementation resembles the
6939 * second-half of the igb_resume routine.
6941 static void igb_io_resume(struct pci_dev *pdev)
6943 struct net_device *netdev = pci_get_drvdata(pdev);
6944 struct igb_adapter *adapter = netdev_priv(netdev);
6946 if (netif_running(netdev)) {
6947 if (igb_up(adapter)) {
6948 dev_err(&pdev->dev, "igb_up failed after reset\n");
6949 return;
6953 netif_device_attach(netdev);
6955 /* let the f/w know that the h/w is now under the control of the
6956 * driver. */
6957 igb_get_hw_control(adapter);
6960 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6961 u8 qsel)
6963 u32 rar_low, rar_high;
6964 struct e1000_hw *hw = &adapter->hw;
6966 /* HW expects these in little endian so we reverse the byte order
6967 * from network order (big endian) to little endian
6969 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6970 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6971 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6973 /* Indicate to hardware the Address is Valid. */
6974 rar_high |= E1000_RAH_AV;
6976 if (hw->mac.type == e1000_82575)
6977 rar_high |= E1000_RAH_POOL_1 * qsel;
6978 else
6979 rar_high |= E1000_RAH_POOL_1 << qsel;
6981 wr32(E1000_RAL(index), rar_low);
6982 wrfl();
6983 wr32(E1000_RAH(index), rar_high);
6984 wrfl();
6987 static int igb_set_vf_mac(struct igb_adapter *adapter,
6988 int vf, unsigned char *mac_addr)
6990 struct e1000_hw *hw = &adapter->hw;
6991 /* VF MAC addresses start at end of receive addresses and moves
6992 * torwards the first, as a result a collision should not be possible */
6993 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6995 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6997 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6999 return 0;
7002 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7004 struct igb_adapter *adapter = netdev_priv(netdev);
7005 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7006 return -EINVAL;
7007 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7008 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7009 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7010 " change effective.");
7011 if (test_bit(__IGB_DOWN, &adapter->state)) {
7012 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7013 " but the PF device is not up.\n");
7014 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7015 " attempting to use the VF device.\n");
7017 return igb_set_vf_mac(adapter, vf, mac);
7020 static int igb_link_mbps(int internal_link_speed)
7022 switch (internal_link_speed) {
7023 case SPEED_100:
7024 return 100;
7025 case SPEED_1000:
7026 return 1000;
7027 default:
7028 return 0;
7032 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7033 int link_speed)
7035 int rf_dec, rf_int;
7036 u32 bcnrc_val;
7038 if (tx_rate != 0) {
7039 /* Calculate the rate factor values to set */
7040 rf_int = link_speed / tx_rate;
7041 rf_dec = (link_speed - (rf_int * tx_rate));
7042 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7044 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7045 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7046 E1000_RTTBCNRC_RF_INT_MASK);
7047 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7048 } else {
7049 bcnrc_val = 0;
7052 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7053 wr32(E1000_RTTBCNRC, bcnrc_val);
7056 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7058 int actual_link_speed, i;
7059 bool reset_rate = false;
7061 /* VF TX rate limit was not set or not supported */
7062 if ((adapter->vf_rate_link_speed == 0) ||
7063 (adapter->hw.mac.type != e1000_82576))
7064 return;
7066 actual_link_speed = igb_link_mbps(adapter->link_speed);
7067 if (actual_link_speed != adapter->vf_rate_link_speed) {
7068 reset_rate = true;
7069 adapter->vf_rate_link_speed = 0;
7070 dev_info(&adapter->pdev->dev,
7071 "Link speed has been changed. VF Transmit "
7072 "rate is disabled\n");
7075 for (i = 0; i < adapter->vfs_allocated_count; i++) {
7076 if (reset_rate)
7077 adapter->vf_data[i].tx_rate = 0;
7079 igb_set_vf_rate_limit(&adapter->hw, i,
7080 adapter->vf_data[i].tx_rate,
7081 actual_link_speed);
7085 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7087 struct igb_adapter *adapter = netdev_priv(netdev);
7088 struct e1000_hw *hw = &adapter->hw;
7089 int actual_link_speed;
7091 if (hw->mac.type != e1000_82576)
7092 return -EOPNOTSUPP;
7094 actual_link_speed = igb_link_mbps(adapter->link_speed);
7095 if ((vf >= adapter->vfs_allocated_count) ||
7096 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7097 (tx_rate < 0) || (tx_rate > actual_link_speed))
7098 return -EINVAL;
7100 adapter->vf_rate_link_speed = actual_link_speed;
7101 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7102 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7104 return 0;
7107 static int igb_ndo_get_vf_config(struct net_device *netdev,
7108 int vf, struct ifla_vf_info *ivi)
7110 struct igb_adapter *adapter = netdev_priv(netdev);
7111 if (vf >= adapter->vfs_allocated_count)
7112 return -EINVAL;
7113 ivi->vf = vf;
7114 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7115 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7116 ivi->vlan = adapter->vf_data[vf].pf_vlan;
7117 ivi->qos = adapter->vf_data[vf].pf_qos;
7118 return 0;
7121 static void igb_vmm_control(struct igb_adapter *adapter)
7123 struct e1000_hw *hw = &adapter->hw;
7124 u32 reg;
7126 switch (hw->mac.type) {
7127 case e1000_82575:
7128 default:
7129 /* replication is not supported for 82575 */
7130 return;
7131 case e1000_82576:
7132 /* notify HW that the MAC is adding vlan tags */
7133 reg = rd32(E1000_DTXCTL);
7134 reg |= E1000_DTXCTL_VLAN_ADDED;
7135 wr32(E1000_DTXCTL, reg);
7136 case e1000_82580:
7137 /* enable replication vlan tag stripping */
7138 reg = rd32(E1000_RPLOLR);
7139 reg |= E1000_RPLOLR_STRVLAN;
7140 wr32(E1000_RPLOLR, reg);
7141 case e1000_i350:
7142 /* none of the above registers are supported by i350 */
7143 break;
7146 if (adapter->vfs_allocated_count) {
7147 igb_vmdq_set_loopback_pf(hw, true);
7148 igb_vmdq_set_replication_pf(hw, true);
7149 igb_vmdq_set_anti_spoofing_pf(hw, true,
7150 adapter->vfs_allocated_count);
7151 } else {
7152 igb_vmdq_set_loopback_pf(hw, false);
7153 igb_vmdq_set_replication_pf(hw, false);
7157 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7159 struct e1000_hw *hw = &adapter->hw;
7160 u32 dmac_thr;
7161 u16 hwm;
7163 if (hw->mac.type > e1000_82580) {
7164 if (adapter->flags & IGB_FLAG_DMAC) {
7165 u32 reg;
7167 /* force threshold to 0. */
7168 wr32(E1000_DMCTXTH, 0);
7171 * DMA Coalescing high water mark needs to be greater
7172 * than the Rx threshold. Set hwm to PBA - max frame
7173 * size in 16B units, capping it at PBA - 6KB.
7175 hwm = 64 * pba - adapter->max_frame_size / 16;
7176 if (hwm < 64 * (pba - 6))
7177 hwm = 64 * (pba - 6);
7178 reg = rd32(E1000_FCRTC);
7179 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7180 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7181 & E1000_FCRTC_RTH_COAL_MASK);
7182 wr32(E1000_FCRTC, reg);
7185 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7186 * frame size, capping it at PBA - 10KB.
7188 dmac_thr = pba - adapter->max_frame_size / 512;
7189 if (dmac_thr < pba - 10)
7190 dmac_thr = pba - 10;
7191 reg = rd32(E1000_DMACR);
7192 reg &= ~E1000_DMACR_DMACTHR_MASK;
7193 reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7194 & E1000_DMACR_DMACTHR_MASK);
7196 /* transition to L0x or L1 if available..*/
7197 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7199 /* watchdog timer= +-1000 usec in 32usec intervals */
7200 reg |= (1000 >> 5);
7201 wr32(E1000_DMACR, reg);
7204 * no lower threshold to disable
7205 * coalescing(smart fifb)-UTRESH=0
7207 wr32(E1000_DMCRTRH, 0);
7209 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7211 wr32(E1000_DMCTLX, reg);
7214 * free space in tx packet buffer to wake from
7215 * DMA coal
7217 wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7218 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7221 * make low power state decision controlled
7222 * by DMA coal
7224 reg = rd32(E1000_PCIEMISC);
7225 reg &= ~E1000_PCIEMISC_LX_DECISION;
7226 wr32(E1000_PCIEMISC, reg);
7227 } /* endif adapter->dmac is not disabled */
7228 } else if (hw->mac.type == e1000_82580) {
7229 u32 reg = rd32(E1000_PCIEMISC);
7230 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7231 wr32(E1000_DMACR, 0);
7235 /* igb_main.c */