igb: Move DMA Coalescing init code to separate function.
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / drivers / net / ethernet / intel / igb / igb_main.c
blobb1863531e03fe80190960ca42391f56c4e776f06
1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
48 #include <linux/ip.h>
49 #include <linux/tcp.h>
50 #include <linux/sctp.h>
51 #include <linux/if_ether.h>
52 #include <linux/aer.h>
53 #include <linux/prefetch.h>
54 #ifdef CONFIG_IGB_DCA
55 #include <linux/dca.h>
56 #endif
57 #include "igb.h"
59 #define MAJ 3
60 #define MIN 2
61 #define BUILD 10
62 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63 __stringify(BUILD) "-k"
64 char igb_driver_name[] = "igb";
65 char igb_driver_version[] = DRV_VERSION;
66 static const char igb_driver_string[] =
67 "Intel(R) Gigabit Ethernet Network Driver";
68 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
70 static const struct e1000_info *igb_info_tbl[] = {
71 [board_82575] = &e1000_82575_info,
74 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100 /* required last entry */
101 {0, }
104 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
106 void igb_reset(struct igb_adapter *);
107 static int igb_setup_all_tx_resources(struct igb_adapter *);
108 static int igb_setup_all_rx_resources(struct igb_adapter *);
109 static void igb_free_all_tx_resources(struct igb_adapter *);
110 static void igb_free_all_rx_resources(struct igb_adapter *);
111 static void igb_setup_mrqc(struct igb_adapter *);
112 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113 static void __devexit igb_remove(struct pci_dev *pdev);
114 static void igb_init_hw_timer(struct igb_adapter *adapter);
115 static int igb_sw_init(struct igb_adapter *);
116 static int igb_open(struct net_device *);
117 static int igb_close(struct net_device *);
118 static void igb_configure_tx(struct igb_adapter *);
119 static void igb_configure_rx(struct igb_adapter *);
120 static void igb_clean_all_tx_rings(struct igb_adapter *);
121 static void igb_clean_all_rx_rings(struct igb_adapter *);
122 static void igb_clean_tx_ring(struct igb_ring *);
123 static void igb_clean_rx_ring(struct igb_ring *);
124 static void igb_set_rx_mode(struct net_device *);
125 static void igb_update_phy_info(unsigned long);
126 static void igb_watchdog(unsigned long);
127 static void igb_watchdog_task(struct work_struct *);
128 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130 struct rtnl_link_stats64 *stats);
131 static int igb_change_mtu(struct net_device *, int);
132 static int igb_set_mac(struct net_device *, void *);
133 static void igb_set_uta(struct igb_adapter *adapter);
134 static irqreturn_t igb_intr(int irq, void *);
135 static irqreturn_t igb_intr_msi(int irq, void *);
136 static irqreturn_t igb_msix_other(int irq, void *);
137 static irqreturn_t igb_msix_ring(int irq, void *);
138 #ifdef CONFIG_IGB_DCA
139 static void igb_update_dca(struct igb_q_vector *);
140 static void igb_setup_dca(struct igb_adapter *);
141 #endif /* CONFIG_IGB_DCA */
142 static int igb_poll(struct napi_struct *, int);
143 static bool igb_clean_tx_irq(struct igb_q_vector *);
144 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146 static void igb_tx_timeout(struct net_device *);
147 static void igb_reset_task(struct work_struct *);
148 static void igb_vlan_mode(struct net_device *netdev, u32 features);
149 static void igb_vlan_rx_add_vid(struct net_device *, u16);
150 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151 static void igb_restore_vlan(struct igb_adapter *);
152 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153 static void igb_ping_all_vfs(struct igb_adapter *);
154 static void igb_msg_task(struct igb_adapter *);
155 static void igb_vmm_control(struct igb_adapter *);
156 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160 int vf, u16 vlan, u8 qos);
161 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163 struct ifla_vf_info *ivi);
164 static void igb_check_vf_rate_limit(struct igb_adapter *);
166 #ifdef CONFIG_PCI_IOV
167 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
168 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
169 static int igb_check_vf_assignment(struct igb_adapter *adapter);
170 #endif
172 #ifdef CONFIG_PM
173 static int igb_suspend(struct pci_dev *, pm_message_t);
174 static int igb_resume(struct pci_dev *);
175 #endif
176 static void igb_shutdown(struct pci_dev *);
177 #ifdef CONFIG_IGB_DCA
178 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
179 static struct notifier_block dca_notifier = {
180 .notifier_call = igb_notify_dca,
181 .next = NULL,
182 .priority = 0
184 #endif
185 #ifdef CONFIG_NET_POLL_CONTROLLER
186 /* for netdump / net console */
187 static void igb_netpoll(struct net_device *);
188 #endif
189 #ifdef CONFIG_PCI_IOV
190 static unsigned int max_vfs = 0;
191 module_param(max_vfs, uint, 0);
192 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
193 "per physical function");
194 #endif /* CONFIG_PCI_IOV */
196 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
197 pci_channel_state_t);
198 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
199 static void igb_io_resume(struct pci_dev *);
201 static struct pci_error_handlers igb_err_handler = {
202 .error_detected = igb_io_error_detected,
203 .slot_reset = igb_io_slot_reset,
204 .resume = igb_io_resume,
207 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
209 static struct pci_driver igb_driver = {
210 .name = igb_driver_name,
211 .id_table = igb_pci_tbl,
212 .probe = igb_probe,
213 .remove = __devexit_p(igb_remove),
214 #ifdef CONFIG_PM
215 /* Power Management Hooks */
216 .suspend = igb_suspend,
217 .resume = igb_resume,
218 #endif
219 .shutdown = igb_shutdown,
220 .err_handler = &igb_err_handler
223 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
224 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
225 MODULE_LICENSE("GPL");
226 MODULE_VERSION(DRV_VERSION);
228 struct igb_reg_info {
229 u32 ofs;
230 char *name;
233 static const struct igb_reg_info igb_reg_info_tbl[] = {
235 /* General Registers */
236 {E1000_CTRL, "CTRL"},
237 {E1000_STATUS, "STATUS"},
238 {E1000_CTRL_EXT, "CTRL_EXT"},
240 /* Interrupt Registers */
241 {E1000_ICR, "ICR"},
243 /* RX Registers */
244 {E1000_RCTL, "RCTL"},
245 {E1000_RDLEN(0), "RDLEN"},
246 {E1000_RDH(0), "RDH"},
247 {E1000_RDT(0), "RDT"},
248 {E1000_RXDCTL(0), "RXDCTL"},
249 {E1000_RDBAL(0), "RDBAL"},
250 {E1000_RDBAH(0), "RDBAH"},
252 /* TX Registers */
253 {E1000_TCTL, "TCTL"},
254 {E1000_TDBAL(0), "TDBAL"},
255 {E1000_TDBAH(0), "TDBAH"},
256 {E1000_TDLEN(0), "TDLEN"},
257 {E1000_TDH(0), "TDH"},
258 {E1000_TDT(0), "TDT"},
259 {E1000_TXDCTL(0), "TXDCTL"},
260 {E1000_TDFH, "TDFH"},
261 {E1000_TDFT, "TDFT"},
262 {E1000_TDFHS, "TDFHS"},
263 {E1000_TDFPC, "TDFPC"},
265 /* List Terminator */
270 * igb_regdump - register printout routine
272 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
274 int n = 0;
275 char rname[16];
276 u32 regs[8];
278 switch (reginfo->ofs) {
279 case E1000_RDLEN(0):
280 for (n = 0; n < 4; n++)
281 regs[n] = rd32(E1000_RDLEN(n));
282 break;
283 case E1000_RDH(0):
284 for (n = 0; n < 4; n++)
285 regs[n] = rd32(E1000_RDH(n));
286 break;
287 case E1000_RDT(0):
288 for (n = 0; n < 4; n++)
289 regs[n] = rd32(E1000_RDT(n));
290 break;
291 case E1000_RXDCTL(0):
292 for (n = 0; n < 4; n++)
293 regs[n] = rd32(E1000_RXDCTL(n));
294 break;
295 case E1000_RDBAL(0):
296 for (n = 0; n < 4; n++)
297 regs[n] = rd32(E1000_RDBAL(n));
298 break;
299 case E1000_RDBAH(0):
300 for (n = 0; n < 4; n++)
301 regs[n] = rd32(E1000_RDBAH(n));
302 break;
303 case E1000_TDBAL(0):
304 for (n = 0; n < 4; n++)
305 regs[n] = rd32(E1000_RDBAL(n));
306 break;
307 case E1000_TDBAH(0):
308 for (n = 0; n < 4; n++)
309 regs[n] = rd32(E1000_TDBAH(n));
310 break;
311 case E1000_TDLEN(0):
312 for (n = 0; n < 4; n++)
313 regs[n] = rd32(E1000_TDLEN(n));
314 break;
315 case E1000_TDH(0):
316 for (n = 0; n < 4; n++)
317 regs[n] = rd32(E1000_TDH(n));
318 break;
319 case E1000_TDT(0):
320 for (n = 0; n < 4; n++)
321 regs[n] = rd32(E1000_TDT(n));
322 break;
323 case E1000_TXDCTL(0):
324 for (n = 0; n < 4; n++)
325 regs[n] = rd32(E1000_TXDCTL(n));
326 break;
327 default:
328 printk(KERN_INFO "%-15s %08x\n",
329 reginfo->name, rd32(reginfo->ofs));
330 return;
333 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
334 printk(KERN_INFO "%-15s ", rname);
335 for (n = 0; n < 4; n++)
336 printk(KERN_CONT "%08x ", regs[n]);
337 printk(KERN_CONT "\n");
341 * igb_dump - Print registers, tx-rings and rx-rings
343 static void igb_dump(struct igb_adapter *adapter)
345 struct net_device *netdev = adapter->netdev;
346 struct e1000_hw *hw = &adapter->hw;
347 struct igb_reg_info *reginfo;
348 struct igb_ring *tx_ring;
349 union e1000_adv_tx_desc *tx_desc;
350 struct my_u0 { u64 a; u64 b; } *u0;
351 struct igb_ring *rx_ring;
352 union e1000_adv_rx_desc *rx_desc;
353 u32 staterr;
354 u16 i, n;
356 if (!netif_msg_hw(adapter))
357 return;
359 /* Print netdevice Info */
360 if (netdev) {
361 dev_info(&adapter->pdev->dev, "Net device Info\n");
362 printk(KERN_INFO "Device Name state "
363 "trans_start last_rx\n");
364 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
365 netdev->name,
366 netdev->state,
367 netdev->trans_start,
368 netdev->last_rx);
371 /* Print Registers */
372 dev_info(&adapter->pdev->dev, "Register Dump\n");
373 printk(KERN_INFO " Register Name Value\n");
374 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
375 reginfo->name; reginfo++) {
376 igb_regdump(hw, reginfo);
379 /* Print TX Ring Summary */
380 if (!netdev || !netif_running(netdev))
381 goto exit;
383 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
384 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
385 " leng ntw timestamp\n");
386 for (n = 0; n < adapter->num_tx_queues; n++) {
387 struct igb_tx_buffer *buffer_info;
388 tx_ring = adapter->tx_ring[n];
389 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
390 printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
391 n, tx_ring->next_to_use, tx_ring->next_to_clean,
392 (u64)buffer_info->dma,
393 buffer_info->length,
394 buffer_info->next_to_watch,
395 (u64)buffer_info->time_stamp);
398 /* Print TX Rings */
399 if (!netif_msg_tx_done(adapter))
400 goto rx_ring_summary;
402 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
404 /* Transmit Descriptor Formats
406 * Advanced Transmit Descriptor
407 * +--------------------------------------------------------------+
408 * 0 | Buffer Address [63:0] |
409 * +--------------------------------------------------------------+
410 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
411 * +--------------------------------------------------------------+
412 * 63 46 45 40 39 38 36 35 32 31 24 15 0
415 for (n = 0; n < adapter->num_tx_queues; n++) {
416 tx_ring = adapter->tx_ring[n];
417 printk(KERN_INFO "------------------------------------\n");
418 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
419 printk(KERN_INFO "------------------------------------\n");
420 printk(KERN_INFO "T [desc] [address 63:0 ] "
421 "[PlPOCIStDDM Ln] [bi->dma ] "
422 "leng ntw timestamp bi->skb\n");
424 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
425 struct igb_tx_buffer *buffer_info;
426 tx_desc = IGB_TX_DESC(tx_ring, i);
427 buffer_info = &tx_ring->tx_buffer_info[i];
428 u0 = (struct my_u0 *)tx_desc;
429 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
430 " %04X %p %016llX %p", i,
431 le64_to_cpu(u0->a),
432 le64_to_cpu(u0->b),
433 (u64)buffer_info->dma,
434 buffer_info->length,
435 buffer_info->next_to_watch,
436 (u64)buffer_info->time_stamp,
437 buffer_info->skb);
438 if (i == tx_ring->next_to_use &&
439 i == tx_ring->next_to_clean)
440 printk(KERN_CONT " NTC/U\n");
441 else if (i == tx_ring->next_to_use)
442 printk(KERN_CONT " NTU\n");
443 else if (i == tx_ring->next_to_clean)
444 printk(KERN_CONT " NTC\n");
445 else
446 printk(KERN_CONT "\n");
448 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
449 print_hex_dump(KERN_INFO, "",
450 DUMP_PREFIX_ADDRESS,
451 16, 1, phys_to_virt(buffer_info->dma),
452 buffer_info->length, true);
456 /* Print RX Rings Summary */
457 rx_ring_summary:
458 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
459 printk(KERN_INFO "Queue [NTU] [NTC]\n");
460 for (n = 0; n < adapter->num_rx_queues; n++) {
461 rx_ring = adapter->rx_ring[n];
462 printk(KERN_INFO " %5d %5X %5X\n", n,
463 rx_ring->next_to_use, rx_ring->next_to_clean);
466 /* Print RX Rings */
467 if (!netif_msg_rx_status(adapter))
468 goto exit;
470 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
472 /* Advanced Receive Descriptor (Read) Format
473 * 63 1 0
474 * +-----------------------------------------------------+
475 * 0 | Packet Buffer Address [63:1] |A0/NSE|
476 * +----------------------------------------------+------+
477 * 8 | Header Buffer Address [63:1] | DD |
478 * +-----------------------------------------------------+
481 * Advanced Receive Descriptor (Write-Back) Format
483 * 63 48 47 32 31 30 21 20 17 16 4 3 0
484 * +------------------------------------------------------+
485 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
486 * | Checksum Ident | | | | Type | Type |
487 * +------------------------------------------------------+
488 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
489 * +------------------------------------------------------+
490 * 63 48 47 32 31 20 19 0
493 for (n = 0; n < adapter->num_rx_queues; n++) {
494 rx_ring = adapter->rx_ring[n];
495 printk(KERN_INFO "------------------------------------\n");
496 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
497 printk(KERN_INFO "------------------------------------\n");
498 printk(KERN_INFO "R [desc] [ PktBuf A0] "
499 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
500 "<-- Adv Rx Read format\n");
501 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
502 "[vl er S cks ln] ---------------- [bi->skb] "
503 "<-- Adv Rx Write-Back format\n");
505 for (i = 0; i < rx_ring->count; i++) {
506 struct igb_rx_buffer *buffer_info;
507 buffer_info = &rx_ring->rx_buffer_info[i];
508 rx_desc = IGB_RX_DESC(rx_ring, i);
509 u0 = (struct my_u0 *)rx_desc;
510 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
511 if (staterr & E1000_RXD_STAT_DD) {
512 /* Descriptor Done */
513 printk(KERN_INFO "RWB[0x%03X] %016llX "
514 "%016llX ---------------- %p", i,
515 le64_to_cpu(u0->a),
516 le64_to_cpu(u0->b),
517 buffer_info->skb);
518 } else {
519 printk(KERN_INFO "R [0x%03X] %016llX "
520 "%016llX %016llX %p", i,
521 le64_to_cpu(u0->a),
522 le64_to_cpu(u0->b),
523 (u64)buffer_info->dma,
524 buffer_info->skb);
526 if (netif_msg_pktdata(adapter)) {
527 print_hex_dump(KERN_INFO, "",
528 DUMP_PREFIX_ADDRESS,
529 16, 1,
530 phys_to_virt(buffer_info->dma),
531 IGB_RX_HDR_LEN, true);
532 print_hex_dump(KERN_INFO, "",
533 DUMP_PREFIX_ADDRESS,
534 16, 1,
535 phys_to_virt(
536 buffer_info->page_dma +
537 buffer_info->page_offset),
538 PAGE_SIZE/2, true);
542 if (i == rx_ring->next_to_use)
543 printk(KERN_CONT " NTU\n");
544 else if (i == rx_ring->next_to_clean)
545 printk(KERN_CONT " NTC\n");
546 else
547 printk(KERN_CONT "\n");
552 exit:
553 return;
558 * igb_read_clock - read raw cycle counter (to be used by time counter)
560 static cycle_t igb_read_clock(const struct cyclecounter *tc)
562 struct igb_adapter *adapter =
563 container_of(tc, struct igb_adapter, cycles);
564 struct e1000_hw *hw = &adapter->hw;
565 u64 stamp = 0;
566 int shift = 0;
569 * The timestamp latches on lowest register read. For the 82580
570 * the lowest register is SYSTIMR instead of SYSTIML. However we never
571 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
573 if (hw->mac.type >= e1000_82580) {
574 stamp = rd32(E1000_SYSTIMR) >> 8;
575 shift = IGB_82580_TSYNC_SHIFT;
578 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
579 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
580 return stamp;
584 * igb_get_hw_dev - return device
585 * used by hardware layer to print debugging information
587 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
589 struct igb_adapter *adapter = hw->back;
590 return adapter->netdev;
594 * igb_init_module - Driver Registration Routine
596 * igb_init_module is the first routine called when the driver is
597 * loaded. All it does is register with the PCI subsystem.
599 static int __init igb_init_module(void)
601 int ret;
602 printk(KERN_INFO "%s - version %s\n",
603 igb_driver_string, igb_driver_version);
605 printk(KERN_INFO "%s\n", igb_copyright);
607 #ifdef CONFIG_IGB_DCA
608 dca_register_notify(&dca_notifier);
609 #endif
610 ret = pci_register_driver(&igb_driver);
611 return ret;
614 module_init(igb_init_module);
617 * igb_exit_module - Driver Exit Cleanup Routine
619 * igb_exit_module is called just before the driver is removed
620 * from memory.
622 static void __exit igb_exit_module(void)
624 #ifdef CONFIG_IGB_DCA
625 dca_unregister_notify(&dca_notifier);
626 #endif
627 pci_unregister_driver(&igb_driver);
630 module_exit(igb_exit_module);
632 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
634 * igb_cache_ring_register - Descriptor ring to register mapping
635 * @adapter: board private structure to initialize
637 * Once we know the feature-set enabled for the device, we'll cache
638 * the register offset the descriptor ring is assigned to.
640 static void igb_cache_ring_register(struct igb_adapter *adapter)
642 int i = 0, j = 0;
643 u32 rbase_offset = adapter->vfs_allocated_count;
645 switch (adapter->hw.mac.type) {
646 case e1000_82576:
647 /* The queues are allocated for virtualization such that VF 0
648 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
649 * In order to avoid collision we start at the first free queue
650 * and continue consuming queues in the same sequence
652 if (adapter->vfs_allocated_count) {
653 for (; i < adapter->rss_queues; i++)
654 adapter->rx_ring[i]->reg_idx = rbase_offset +
655 Q_IDX_82576(i);
657 case e1000_82575:
658 case e1000_82580:
659 case e1000_i350:
660 default:
661 for (; i < adapter->num_rx_queues; i++)
662 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
663 for (; j < adapter->num_tx_queues; j++)
664 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
665 break;
669 static void igb_free_queues(struct igb_adapter *adapter)
671 int i;
673 for (i = 0; i < adapter->num_tx_queues; i++) {
674 kfree(adapter->tx_ring[i]);
675 adapter->tx_ring[i] = NULL;
677 for (i = 0; i < adapter->num_rx_queues; i++) {
678 kfree(adapter->rx_ring[i]);
679 adapter->rx_ring[i] = NULL;
681 adapter->num_rx_queues = 0;
682 adapter->num_tx_queues = 0;
686 * igb_alloc_queues - Allocate memory for all rings
687 * @adapter: board private structure to initialize
689 * We allocate one ring per queue at run-time since we don't know the
690 * number of queues at compile-time.
692 static int igb_alloc_queues(struct igb_adapter *adapter)
694 struct igb_ring *ring;
695 int i;
696 int orig_node = adapter->node;
698 for (i = 0; i < adapter->num_tx_queues; i++) {
699 if (orig_node == -1) {
700 int cur_node = next_online_node(adapter->node);
701 if (cur_node == MAX_NUMNODES)
702 cur_node = first_online_node;
703 adapter->node = cur_node;
705 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
706 adapter->node);
707 if (!ring)
708 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
709 if (!ring)
710 goto err;
711 ring->count = adapter->tx_ring_count;
712 ring->queue_index = i;
713 ring->dev = &adapter->pdev->dev;
714 ring->netdev = adapter->netdev;
715 ring->numa_node = adapter->node;
716 /* For 82575, context index must be unique per ring. */
717 if (adapter->hw.mac.type == e1000_82575)
718 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
719 adapter->tx_ring[i] = ring;
721 /* Restore the adapter's original node */
722 adapter->node = orig_node;
724 for (i = 0; i < adapter->num_rx_queues; i++) {
725 if (orig_node == -1) {
726 int cur_node = next_online_node(adapter->node);
727 if (cur_node == MAX_NUMNODES)
728 cur_node = first_online_node;
729 adapter->node = cur_node;
731 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
732 adapter->node);
733 if (!ring)
734 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
735 if (!ring)
736 goto err;
737 ring->count = adapter->rx_ring_count;
738 ring->queue_index = i;
739 ring->dev = &adapter->pdev->dev;
740 ring->netdev = adapter->netdev;
741 ring->numa_node = adapter->node;
742 /* set flag indicating ring supports SCTP checksum offload */
743 if (adapter->hw.mac.type >= e1000_82576)
744 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
746 /* On i350, loopback VLAN packets have the tag byte-swapped. */
747 if (adapter->hw.mac.type == e1000_i350)
748 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
750 adapter->rx_ring[i] = ring;
752 /* Restore the adapter's original node */
753 adapter->node = orig_node;
755 igb_cache_ring_register(adapter);
757 return 0;
759 err:
760 /* Restore the adapter's original node */
761 adapter->node = orig_node;
762 igb_free_queues(adapter);
764 return -ENOMEM;
768 * igb_write_ivar - configure ivar for given MSI-X vector
769 * @hw: pointer to the HW structure
770 * @msix_vector: vector number we are allocating to a given ring
771 * @index: row index of IVAR register to write within IVAR table
772 * @offset: column offset of in IVAR, should be multiple of 8
774 * This function is intended to handle the writing of the IVAR register
775 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
776 * each containing an cause allocation for an Rx and Tx ring, and a
777 * variable number of rows depending on the number of queues supported.
779 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
780 int index, int offset)
782 u32 ivar = array_rd32(E1000_IVAR0, index);
784 /* clear any bits that are currently set */
785 ivar &= ~((u32)0xFF << offset);
787 /* write vector and valid bit */
788 ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
790 array_wr32(E1000_IVAR0, index, ivar);
793 #define IGB_N0_QUEUE -1
794 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
796 struct igb_adapter *adapter = q_vector->adapter;
797 struct e1000_hw *hw = &adapter->hw;
798 int rx_queue = IGB_N0_QUEUE;
799 int tx_queue = IGB_N0_QUEUE;
800 u32 msixbm = 0;
802 if (q_vector->rx.ring)
803 rx_queue = q_vector->rx.ring->reg_idx;
804 if (q_vector->tx.ring)
805 tx_queue = q_vector->tx.ring->reg_idx;
807 switch (hw->mac.type) {
808 case e1000_82575:
809 /* The 82575 assigns vectors using a bitmask, which matches the
810 bitmask for the EICR/EIMS/EIMC registers. To assign one
811 or more queues to a vector, we write the appropriate bits
812 into the MSIXBM register for that vector. */
813 if (rx_queue > IGB_N0_QUEUE)
814 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
815 if (tx_queue > IGB_N0_QUEUE)
816 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
817 if (!adapter->msix_entries && msix_vector == 0)
818 msixbm |= E1000_EIMS_OTHER;
819 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
820 q_vector->eims_value = msixbm;
821 break;
822 case e1000_82576:
824 * 82576 uses a table that essentially consists of 2 columns
825 * with 8 rows. The ordering is column-major so we use the
826 * lower 3 bits as the row index, and the 4th bit as the
827 * column offset.
829 if (rx_queue > IGB_N0_QUEUE)
830 igb_write_ivar(hw, msix_vector,
831 rx_queue & 0x7,
832 (rx_queue & 0x8) << 1);
833 if (tx_queue > IGB_N0_QUEUE)
834 igb_write_ivar(hw, msix_vector,
835 tx_queue & 0x7,
836 ((tx_queue & 0x8) << 1) + 8);
837 q_vector->eims_value = 1 << msix_vector;
838 break;
839 case e1000_82580:
840 case e1000_i350:
842 * On 82580 and newer adapters the scheme is similar to 82576
843 * however instead of ordering column-major we have things
844 * ordered row-major. So we traverse the table by using
845 * bit 0 as the column offset, and the remaining bits as the
846 * row index.
848 if (rx_queue > IGB_N0_QUEUE)
849 igb_write_ivar(hw, msix_vector,
850 rx_queue >> 1,
851 (rx_queue & 0x1) << 4);
852 if (tx_queue > IGB_N0_QUEUE)
853 igb_write_ivar(hw, msix_vector,
854 tx_queue >> 1,
855 ((tx_queue & 0x1) << 4) + 8);
856 q_vector->eims_value = 1 << msix_vector;
857 break;
858 default:
859 BUG();
860 break;
863 /* add q_vector eims value to global eims_enable_mask */
864 adapter->eims_enable_mask |= q_vector->eims_value;
866 /* configure q_vector to set itr on first interrupt */
867 q_vector->set_itr = 1;
871 * igb_configure_msix - Configure MSI-X hardware
873 * igb_configure_msix sets up the hardware to properly
874 * generate MSI-X interrupts.
876 static void igb_configure_msix(struct igb_adapter *adapter)
878 u32 tmp;
879 int i, vector = 0;
880 struct e1000_hw *hw = &adapter->hw;
882 adapter->eims_enable_mask = 0;
884 /* set vector for other causes, i.e. link changes */
885 switch (hw->mac.type) {
886 case e1000_82575:
887 tmp = rd32(E1000_CTRL_EXT);
888 /* enable MSI-X PBA support*/
889 tmp |= E1000_CTRL_EXT_PBA_CLR;
891 /* Auto-Mask interrupts upon ICR read. */
892 tmp |= E1000_CTRL_EXT_EIAME;
893 tmp |= E1000_CTRL_EXT_IRCA;
895 wr32(E1000_CTRL_EXT, tmp);
897 /* enable msix_other interrupt */
898 array_wr32(E1000_MSIXBM(0), vector++,
899 E1000_EIMS_OTHER);
900 adapter->eims_other = E1000_EIMS_OTHER;
902 break;
904 case e1000_82576:
905 case e1000_82580:
906 case e1000_i350:
907 /* Turn on MSI-X capability first, or our settings
908 * won't stick. And it will take days to debug. */
909 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
910 E1000_GPIE_PBA | E1000_GPIE_EIAME |
911 E1000_GPIE_NSICR);
913 /* enable msix_other interrupt */
914 adapter->eims_other = 1 << vector;
915 tmp = (vector++ | E1000_IVAR_VALID) << 8;
917 wr32(E1000_IVAR_MISC, tmp);
918 break;
919 default:
920 /* do nothing, since nothing else supports MSI-X */
921 break;
922 } /* switch (hw->mac.type) */
924 adapter->eims_enable_mask |= adapter->eims_other;
926 for (i = 0; i < adapter->num_q_vectors; i++)
927 igb_assign_vector(adapter->q_vector[i], vector++);
929 wrfl();
933 * igb_request_msix - Initialize MSI-X interrupts
935 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
936 * kernel.
938 static int igb_request_msix(struct igb_adapter *adapter)
940 struct net_device *netdev = adapter->netdev;
941 struct e1000_hw *hw = &adapter->hw;
942 int i, err = 0, vector = 0;
944 err = request_irq(adapter->msix_entries[vector].vector,
945 igb_msix_other, 0, netdev->name, adapter);
946 if (err)
947 goto out;
948 vector++;
950 for (i = 0; i < adapter->num_q_vectors; i++) {
951 struct igb_q_vector *q_vector = adapter->q_vector[i];
953 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
955 if (q_vector->rx.ring && q_vector->tx.ring)
956 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
957 q_vector->rx.ring->queue_index);
958 else if (q_vector->tx.ring)
959 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
960 q_vector->tx.ring->queue_index);
961 else if (q_vector->rx.ring)
962 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
963 q_vector->rx.ring->queue_index);
964 else
965 sprintf(q_vector->name, "%s-unused", netdev->name);
967 err = request_irq(adapter->msix_entries[vector].vector,
968 igb_msix_ring, 0, q_vector->name,
969 q_vector);
970 if (err)
971 goto out;
972 vector++;
975 igb_configure_msix(adapter);
976 return 0;
977 out:
978 return err;
981 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
983 if (adapter->msix_entries) {
984 pci_disable_msix(adapter->pdev);
985 kfree(adapter->msix_entries);
986 adapter->msix_entries = NULL;
987 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
988 pci_disable_msi(adapter->pdev);
993 * igb_free_q_vectors - Free memory allocated for interrupt vectors
994 * @adapter: board private structure to initialize
996 * This function frees the memory allocated to the q_vectors. In addition if
997 * NAPI is enabled it will delete any references to the NAPI struct prior
998 * to freeing the q_vector.
1000 static void igb_free_q_vectors(struct igb_adapter *adapter)
1002 int v_idx;
1004 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1005 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1006 adapter->q_vector[v_idx] = NULL;
1007 if (!q_vector)
1008 continue;
1009 netif_napi_del(&q_vector->napi);
1010 kfree(q_vector);
1012 adapter->num_q_vectors = 0;
1016 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1018 * This function resets the device so that it has 0 rx queues, tx queues, and
1019 * MSI-X interrupts allocated.
1021 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1023 igb_free_queues(adapter);
1024 igb_free_q_vectors(adapter);
1025 igb_reset_interrupt_capability(adapter);
1029 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1031 * Attempt to configure interrupts using the best available
1032 * capabilities of the hardware and kernel.
1034 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1036 int err;
1037 int numvecs, i;
1039 /* Number of supported queues. */
1040 adapter->num_rx_queues = adapter->rss_queues;
1041 if (adapter->vfs_allocated_count)
1042 adapter->num_tx_queues = 1;
1043 else
1044 adapter->num_tx_queues = adapter->rss_queues;
1046 /* start with one vector for every rx queue */
1047 numvecs = adapter->num_rx_queues;
1049 /* if tx handler is separate add 1 for every tx queue */
1050 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1051 numvecs += adapter->num_tx_queues;
1053 /* store the number of vectors reserved for queues */
1054 adapter->num_q_vectors = numvecs;
1056 /* add 1 vector for link status interrupts */
1057 numvecs++;
1058 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1059 GFP_KERNEL);
1060 if (!adapter->msix_entries)
1061 goto msi_only;
1063 for (i = 0; i < numvecs; i++)
1064 adapter->msix_entries[i].entry = i;
1066 err = pci_enable_msix(adapter->pdev,
1067 adapter->msix_entries,
1068 numvecs);
1069 if (err == 0)
1070 goto out;
1072 igb_reset_interrupt_capability(adapter);
1074 /* If we can't do MSI-X, try MSI */
1075 msi_only:
1076 #ifdef CONFIG_PCI_IOV
1077 /* disable SR-IOV for non MSI-X configurations */
1078 if (adapter->vf_data) {
1079 struct e1000_hw *hw = &adapter->hw;
1080 /* disable iov and allow time for transactions to clear */
1081 pci_disable_sriov(adapter->pdev);
1082 msleep(500);
1084 kfree(adapter->vf_data);
1085 adapter->vf_data = NULL;
1086 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1087 wrfl();
1088 msleep(100);
1089 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1091 #endif
1092 adapter->vfs_allocated_count = 0;
1093 adapter->rss_queues = 1;
1094 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1095 adapter->num_rx_queues = 1;
1096 adapter->num_tx_queues = 1;
1097 adapter->num_q_vectors = 1;
1098 if (!pci_enable_msi(adapter->pdev))
1099 adapter->flags |= IGB_FLAG_HAS_MSI;
1100 out:
1101 /* Notify the stack of the (possibly) reduced queue counts. */
1102 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1103 return netif_set_real_num_rx_queues(adapter->netdev,
1104 adapter->num_rx_queues);
1108 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1109 * @adapter: board private structure to initialize
1111 * We allocate one q_vector per queue interrupt. If allocation fails we
1112 * return -ENOMEM.
1114 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1116 struct igb_q_vector *q_vector;
1117 struct e1000_hw *hw = &adapter->hw;
1118 int v_idx;
1119 int orig_node = adapter->node;
1121 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1122 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1123 adapter->num_tx_queues)) &&
1124 (adapter->num_rx_queues == v_idx))
1125 adapter->node = orig_node;
1126 if (orig_node == -1) {
1127 int cur_node = next_online_node(adapter->node);
1128 if (cur_node == MAX_NUMNODES)
1129 cur_node = first_online_node;
1130 adapter->node = cur_node;
1132 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1133 adapter->node);
1134 if (!q_vector)
1135 q_vector = kzalloc(sizeof(struct igb_q_vector),
1136 GFP_KERNEL);
1137 if (!q_vector)
1138 goto err_out;
1139 q_vector->adapter = adapter;
1140 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1141 q_vector->itr_val = IGB_START_ITR;
1142 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1143 adapter->q_vector[v_idx] = q_vector;
1145 /* Restore the adapter's original node */
1146 adapter->node = orig_node;
1148 return 0;
1150 err_out:
1151 /* Restore the adapter's original node */
1152 adapter->node = orig_node;
1153 igb_free_q_vectors(adapter);
1154 return -ENOMEM;
1157 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1158 int ring_idx, int v_idx)
1160 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1162 q_vector->rx.ring = adapter->rx_ring[ring_idx];
1163 q_vector->rx.ring->q_vector = q_vector;
1164 q_vector->rx.count++;
1165 q_vector->itr_val = adapter->rx_itr_setting;
1166 if (q_vector->itr_val && q_vector->itr_val <= 3)
1167 q_vector->itr_val = IGB_START_ITR;
1170 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1171 int ring_idx, int v_idx)
1173 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1175 q_vector->tx.ring = adapter->tx_ring[ring_idx];
1176 q_vector->tx.ring->q_vector = q_vector;
1177 q_vector->tx.count++;
1178 q_vector->itr_val = adapter->tx_itr_setting;
1179 q_vector->tx.work_limit = adapter->tx_work_limit;
1180 if (q_vector->itr_val && q_vector->itr_val <= 3)
1181 q_vector->itr_val = IGB_START_ITR;
1185 * igb_map_ring_to_vector - maps allocated queues to vectors
1187 * This function maps the recently allocated queues to vectors.
1189 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1191 int i;
1192 int v_idx = 0;
1194 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1195 (adapter->num_q_vectors < adapter->num_tx_queues))
1196 return -ENOMEM;
1198 if (adapter->num_q_vectors >=
1199 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1200 for (i = 0; i < adapter->num_rx_queues; i++)
1201 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1202 for (i = 0; i < adapter->num_tx_queues; i++)
1203 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1204 } else {
1205 for (i = 0; i < adapter->num_rx_queues; i++) {
1206 if (i < adapter->num_tx_queues)
1207 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1208 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1210 for (; i < adapter->num_tx_queues; i++)
1211 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1213 return 0;
1217 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1219 * This function initializes the interrupts and allocates all of the queues.
1221 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1223 struct pci_dev *pdev = adapter->pdev;
1224 int err;
1226 err = igb_set_interrupt_capability(adapter);
1227 if (err)
1228 return err;
1230 err = igb_alloc_q_vectors(adapter);
1231 if (err) {
1232 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1233 goto err_alloc_q_vectors;
1236 err = igb_alloc_queues(adapter);
1237 if (err) {
1238 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1239 goto err_alloc_queues;
1242 err = igb_map_ring_to_vector(adapter);
1243 if (err) {
1244 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1245 goto err_map_queues;
1249 return 0;
1250 err_map_queues:
1251 igb_free_queues(adapter);
1252 err_alloc_queues:
1253 igb_free_q_vectors(adapter);
1254 err_alloc_q_vectors:
1255 igb_reset_interrupt_capability(adapter);
1256 return err;
1260 * igb_request_irq - initialize interrupts
1262 * Attempts to configure interrupts using the best available
1263 * capabilities of the hardware and kernel.
1265 static int igb_request_irq(struct igb_adapter *adapter)
1267 struct net_device *netdev = adapter->netdev;
1268 struct pci_dev *pdev = adapter->pdev;
1269 int err = 0;
1271 if (adapter->msix_entries) {
1272 err = igb_request_msix(adapter);
1273 if (!err)
1274 goto request_done;
1275 /* fall back to MSI */
1276 igb_clear_interrupt_scheme(adapter);
1277 if (!pci_enable_msi(pdev))
1278 adapter->flags |= IGB_FLAG_HAS_MSI;
1279 igb_free_all_tx_resources(adapter);
1280 igb_free_all_rx_resources(adapter);
1281 adapter->num_tx_queues = 1;
1282 adapter->num_rx_queues = 1;
1283 adapter->num_q_vectors = 1;
1284 err = igb_alloc_q_vectors(adapter);
1285 if (err) {
1286 dev_err(&pdev->dev,
1287 "Unable to allocate memory for vectors\n");
1288 goto request_done;
1290 err = igb_alloc_queues(adapter);
1291 if (err) {
1292 dev_err(&pdev->dev,
1293 "Unable to allocate memory for queues\n");
1294 igb_free_q_vectors(adapter);
1295 goto request_done;
1297 igb_setup_all_tx_resources(adapter);
1298 igb_setup_all_rx_resources(adapter);
1301 igb_assign_vector(adapter->q_vector[0], 0);
1303 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1304 err = request_irq(pdev->irq, igb_intr_msi, 0,
1305 netdev->name, adapter);
1306 if (!err)
1307 goto request_done;
1309 /* fall back to legacy interrupts */
1310 igb_reset_interrupt_capability(adapter);
1311 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1314 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1315 netdev->name, adapter);
1317 if (err)
1318 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1319 err);
1321 request_done:
1322 return err;
1325 static void igb_free_irq(struct igb_adapter *adapter)
1327 if (adapter->msix_entries) {
1328 int vector = 0, i;
1330 free_irq(adapter->msix_entries[vector++].vector, adapter);
1332 for (i = 0; i < adapter->num_q_vectors; i++)
1333 free_irq(adapter->msix_entries[vector++].vector,
1334 adapter->q_vector[i]);
1335 } else {
1336 free_irq(adapter->pdev->irq, adapter);
1341 * igb_irq_disable - Mask off interrupt generation on the NIC
1342 * @adapter: board private structure
1344 static void igb_irq_disable(struct igb_adapter *adapter)
1346 struct e1000_hw *hw = &adapter->hw;
1349 * we need to be careful when disabling interrupts. The VFs are also
1350 * mapped into these registers and so clearing the bits can cause
1351 * issues on the VF drivers so we only need to clear what we set
1353 if (adapter->msix_entries) {
1354 u32 regval = rd32(E1000_EIAM);
1355 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1356 wr32(E1000_EIMC, adapter->eims_enable_mask);
1357 regval = rd32(E1000_EIAC);
1358 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1361 wr32(E1000_IAM, 0);
1362 wr32(E1000_IMC, ~0);
1363 wrfl();
1364 if (adapter->msix_entries) {
1365 int i;
1366 for (i = 0; i < adapter->num_q_vectors; i++)
1367 synchronize_irq(adapter->msix_entries[i].vector);
1368 } else {
1369 synchronize_irq(adapter->pdev->irq);
1374 * igb_irq_enable - Enable default interrupt generation settings
1375 * @adapter: board private structure
1377 static void igb_irq_enable(struct igb_adapter *adapter)
1379 struct e1000_hw *hw = &adapter->hw;
1381 if (adapter->msix_entries) {
1382 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1383 u32 regval = rd32(E1000_EIAC);
1384 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1385 regval = rd32(E1000_EIAM);
1386 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1387 wr32(E1000_EIMS, adapter->eims_enable_mask);
1388 if (adapter->vfs_allocated_count) {
1389 wr32(E1000_MBVFIMR, 0xFF);
1390 ims |= E1000_IMS_VMMB;
1392 wr32(E1000_IMS, ims);
1393 } else {
1394 wr32(E1000_IMS, IMS_ENABLE_MASK |
1395 E1000_IMS_DRSTA);
1396 wr32(E1000_IAM, IMS_ENABLE_MASK |
1397 E1000_IMS_DRSTA);
1401 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1403 struct e1000_hw *hw = &adapter->hw;
1404 u16 vid = adapter->hw.mng_cookie.vlan_id;
1405 u16 old_vid = adapter->mng_vlan_id;
1407 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1408 /* add VID to filter table */
1409 igb_vfta_set(hw, vid, true);
1410 adapter->mng_vlan_id = vid;
1411 } else {
1412 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1415 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1416 (vid != old_vid) &&
1417 !test_bit(old_vid, adapter->active_vlans)) {
1418 /* remove VID from filter table */
1419 igb_vfta_set(hw, old_vid, false);
1424 * igb_release_hw_control - release control of the h/w to f/w
1425 * @adapter: address of board private structure
1427 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1428 * For ASF and Pass Through versions of f/w this means that the
1429 * driver is no longer loaded.
1432 static void igb_release_hw_control(struct igb_adapter *adapter)
1434 struct e1000_hw *hw = &adapter->hw;
1435 u32 ctrl_ext;
1437 /* Let firmware take over control of h/w */
1438 ctrl_ext = rd32(E1000_CTRL_EXT);
1439 wr32(E1000_CTRL_EXT,
1440 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1444 * igb_get_hw_control - get control of the h/w from f/w
1445 * @adapter: address of board private structure
1447 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1448 * For ASF and Pass Through versions of f/w this means that
1449 * the driver is loaded.
1452 static void igb_get_hw_control(struct igb_adapter *adapter)
1454 struct e1000_hw *hw = &adapter->hw;
1455 u32 ctrl_ext;
1457 /* Let firmware know the driver has taken over */
1458 ctrl_ext = rd32(E1000_CTRL_EXT);
1459 wr32(E1000_CTRL_EXT,
1460 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1464 * igb_configure - configure the hardware for RX and TX
1465 * @adapter: private board structure
1467 static void igb_configure(struct igb_adapter *adapter)
1469 struct net_device *netdev = adapter->netdev;
1470 int i;
1472 igb_get_hw_control(adapter);
1473 igb_set_rx_mode(netdev);
1475 igb_restore_vlan(adapter);
1477 igb_setup_tctl(adapter);
1478 igb_setup_mrqc(adapter);
1479 igb_setup_rctl(adapter);
1481 igb_configure_tx(adapter);
1482 igb_configure_rx(adapter);
1484 igb_rx_fifo_flush_82575(&adapter->hw);
1486 /* call igb_desc_unused which always leaves
1487 * at least 1 descriptor unused to make sure
1488 * next_to_use != next_to_clean */
1489 for (i = 0; i < adapter->num_rx_queues; i++) {
1490 struct igb_ring *ring = adapter->rx_ring[i];
1491 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1496 * igb_power_up_link - Power up the phy/serdes link
1497 * @adapter: address of board private structure
1499 void igb_power_up_link(struct igb_adapter *adapter)
1501 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1502 igb_power_up_phy_copper(&adapter->hw);
1503 else
1504 igb_power_up_serdes_link_82575(&adapter->hw);
1508 * igb_power_down_link - Power down the phy/serdes link
1509 * @adapter: address of board private structure
1511 static void igb_power_down_link(struct igb_adapter *adapter)
1513 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1514 igb_power_down_phy_copper_82575(&adapter->hw);
1515 else
1516 igb_shutdown_serdes_link_82575(&adapter->hw);
1520 * igb_up - Open the interface and prepare it to handle traffic
1521 * @adapter: board private structure
1523 int igb_up(struct igb_adapter *adapter)
1525 struct e1000_hw *hw = &adapter->hw;
1526 int i;
1528 /* hardware has been reset, we need to reload some things */
1529 igb_configure(adapter);
1531 clear_bit(__IGB_DOWN, &adapter->state);
1533 for (i = 0; i < adapter->num_q_vectors; i++)
1534 napi_enable(&(adapter->q_vector[i]->napi));
1536 if (adapter->msix_entries)
1537 igb_configure_msix(adapter);
1538 else
1539 igb_assign_vector(adapter->q_vector[0], 0);
1541 /* Clear any pending interrupts. */
1542 rd32(E1000_ICR);
1543 igb_irq_enable(adapter);
1545 /* notify VFs that reset has been completed */
1546 if (adapter->vfs_allocated_count) {
1547 u32 reg_data = rd32(E1000_CTRL_EXT);
1548 reg_data |= E1000_CTRL_EXT_PFRSTD;
1549 wr32(E1000_CTRL_EXT, reg_data);
1552 netif_tx_start_all_queues(adapter->netdev);
1554 /* start the watchdog. */
1555 hw->mac.get_link_status = 1;
1556 schedule_work(&adapter->watchdog_task);
1558 return 0;
1561 void igb_down(struct igb_adapter *adapter)
1563 struct net_device *netdev = adapter->netdev;
1564 struct e1000_hw *hw = &adapter->hw;
1565 u32 tctl, rctl;
1566 int i;
1568 /* signal that we're down so the interrupt handler does not
1569 * reschedule our watchdog timer */
1570 set_bit(__IGB_DOWN, &adapter->state);
1572 /* disable receives in the hardware */
1573 rctl = rd32(E1000_RCTL);
1574 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1575 /* flush and sleep below */
1577 netif_tx_stop_all_queues(netdev);
1579 /* disable transmits in the hardware */
1580 tctl = rd32(E1000_TCTL);
1581 tctl &= ~E1000_TCTL_EN;
1582 wr32(E1000_TCTL, tctl);
1583 /* flush both disables and wait for them to finish */
1584 wrfl();
1585 msleep(10);
1587 for (i = 0; i < adapter->num_q_vectors; i++)
1588 napi_disable(&(adapter->q_vector[i]->napi));
1590 igb_irq_disable(adapter);
1592 del_timer_sync(&adapter->watchdog_timer);
1593 del_timer_sync(&adapter->phy_info_timer);
1595 netif_carrier_off(netdev);
1597 /* record the stats before reset*/
1598 spin_lock(&adapter->stats64_lock);
1599 igb_update_stats(adapter, &adapter->stats64);
1600 spin_unlock(&adapter->stats64_lock);
1602 adapter->link_speed = 0;
1603 adapter->link_duplex = 0;
1605 if (!pci_channel_offline(adapter->pdev))
1606 igb_reset(adapter);
1607 igb_clean_all_tx_rings(adapter);
1608 igb_clean_all_rx_rings(adapter);
1609 #ifdef CONFIG_IGB_DCA
1611 /* since we reset the hardware DCA settings were cleared */
1612 igb_setup_dca(adapter);
1613 #endif
1616 void igb_reinit_locked(struct igb_adapter *adapter)
1618 WARN_ON(in_interrupt());
1619 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1620 msleep(1);
1621 igb_down(adapter);
1622 igb_up(adapter);
1623 clear_bit(__IGB_RESETTING, &adapter->state);
1626 void igb_reset(struct igb_adapter *adapter)
1628 struct pci_dev *pdev = adapter->pdev;
1629 struct e1000_hw *hw = &adapter->hw;
1630 struct e1000_mac_info *mac = &hw->mac;
1631 struct e1000_fc_info *fc = &hw->fc;
1632 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1633 u16 hwm;
1635 /* Repartition Pba for greater than 9k mtu
1636 * To take effect CTRL.RST is required.
1638 switch (mac->type) {
1639 case e1000_i350:
1640 case e1000_82580:
1641 pba = rd32(E1000_RXPBS);
1642 pba = igb_rxpbs_adjust_82580(pba);
1643 break;
1644 case e1000_82576:
1645 pba = rd32(E1000_RXPBS);
1646 pba &= E1000_RXPBS_SIZE_MASK_82576;
1647 break;
1648 case e1000_82575:
1649 default:
1650 pba = E1000_PBA_34K;
1651 break;
1654 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1655 (mac->type < e1000_82576)) {
1656 /* adjust PBA for jumbo frames */
1657 wr32(E1000_PBA, pba);
1659 /* To maintain wire speed transmits, the Tx FIFO should be
1660 * large enough to accommodate two full transmit packets,
1661 * rounded up to the next 1KB and expressed in KB. Likewise,
1662 * the Rx FIFO should be large enough to accommodate at least
1663 * one full receive packet and is similarly rounded up and
1664 * expressed in KB. */
1665 pba = rd32(E1000_PBA);
1666 /* upper 16 bits has Tx packet buffer allocation size in KB */
1667 tx_space = pba >> 16;
1668 /* lower 16 bits has Rx packet buffer allocation size in KB */
1669 pba &= 0xffff;
1670 /* the tx fifo also stores 16 bytes of information about the tx
1671 * but don't include ethernet FCS because hardware appends it */
1672 min_tx_space = (adapter->max_frame_size +
1673 sizeof(union e1000_adv_tx_desc) -
1674 ETH_FCS_LEN) * 2;
1675 min_tx_space = ALIGN(min_tx_space, 1024);
1676 min_tx_space >>= 10;
1677 /* software strips receive CRC, so leave room for it */
1678 min_rx_space = adapter->max_frame_size;
1679 min_rx_space = ALIGN(min_rx_space, 1024);
1680 min_rx_space >>= 10;
1682 /* If current Tx allocation is less than the min Tx FIFO size,
1683 * and the min Tx FIFO size is less than the current Rx FIFO
1684 * allocation, take space away from current Rx allocation */
1685 if (tx_space < min_tx_space &&
1686 ((min_tx_space - tx_space) < pba)) {
1687 pba = pba - (min_tx_space - tx_space);
1689 /* if short on rx space, rx wins and must trump tx
1690 * adjustment */
1691 if (pba < min_rx_space)
1692 pba = min_rx_space;
1694 wr32(E1000_PBA, pba);
1697 /* flow control settings */
1698 /* The high water mark must be low enough to fit one full frame
1699 * (or the size used for early receive) above it in the Rx FIFO.
1700 * Set it to the lower of:
1701 * - 90% of the Rx FIFO size, or
1702 * - the full Rx FIFO size minus one full frame */
1703 hwm = min(((pba << 10) * 9 / 10),
1704 ((pba << 10) - 2 * adapter->max_frame_size));
1706 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1707 fc->low_water = fc->high_water - 16;
1708 fc->pause_time = 0xFFFF;
1709 fc->send_xon = 1;
1710 fc->current_mode = fc->requested_mode;
1712 /* disable receive for all VFs and wait one second */
1713 if (adapter->vfs_allocated_count) {
1714 int i;
1715 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1716 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1718 /* ping all the active vfs to let them know we are going down */
1719 igb_ping_all_vfs(adapter);
1721 /* disable transmits and receives */
1722 wr32(E1000_VFRE, 0);
1723 wr32(E1000_VFTE, 0);
1726 /* Allow time for pending master requests to run */
1727 hw->mac.ops.reset_hw(hw);
1728 wr32(E1000_WUC, 0);
1730 if (hw->mac.ops.init_hw(hw))
1731 dev_err(&pdev->dev, "Hardware Error\n");
1733 igb_init_dmac(adapter, pba);
1734 if (!netif_running(adapter->netdev))
1735 igb_power_down_link(adapter);
1737 igb_update_mng_vlan(adapter);
1739 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1740 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1742 igb_get_phy_info(hw);
1745 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1748 * Since there is no support for separate rx/tx vlan accel
1749 * enable/disable make sure tx flag is always in same state as rx.
1751 if (features & NETIF_F_HW_VLAN_RX)
1752 features |= NETIF_F_HW_VLAN_TX;
1753 else
1754 features &= ~NETIF_F_HW_VLAN_TX;
1756 return features;
1759 static int igb_set_features(struct net_device *netdev, u32 features)
1761 u32 changed = netdev->features ^ features;
1763 if (changed & NETIF_F_HW_VLAN_RX)
1764 igb_vlan_mode(netdev, features);
1766 return 0;
1769 static const struct net_device_ops igb_netdev_ops = {
1770 .ndo_open = igb_open,
1771 .ndo_stop = igb_close,
1772 .ndo_start_xmit = igb_xmit_frame,
1773 .ndo_get_stats64 = igb_get_stats64,
1774 .ndo_set_rx_mode = igb_set_rx_mode,
1775 .ndo_set_mac_address = igb_set_mac,
1776 .ndo_change_mtu = igb_change_mtu,
1777 .ndo_do_ioctl = igb_ioctl,
1778 .ndo_tx_timeout = igb_tx_timeout,
1779 .ndo_validate_addr = eth_validate_addr,
1780 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1781 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1782 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1783 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1784 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1785 .ndo_get_vf_config = igb_ndo_get_vf_config,
1786 #ifdef CONFIG_NET_POLL_CONTROLLER
1787 .ndo_poll_controller = igb_netpoll,
1788 #endif
1789 .ndo_fix_features = igb_fix_features,
1790 .ndo_set_features = igb_set_features,
1794 * igb_probe - Device Initialization Routine
1795 * @pdev: PCI device information struct
1796 * @ent: entry in igb_pci_tbl
1798 * Returns 0 on success, negative on failure
1800 * igb_probe initializes an adapter identified by a pci_dev structure.
1801 * The OS initialization, configuring of the adapter private structure,
1802 * and a hardware reset occur.
1804 static int __devinit igb_probe(struct pci_dev *pdev,
1805 const struct pci_device_id *ent)
1807 struct net_device *netdev;
1808 struct igb_adapter *adapter;
1809 struct e1000_hw *hw;
1810 u16 eeprom_data = 0;
1811 s32 ret_val;
1812 static int global_quad_port_a; /* global quad port a indication */
1813 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1814 unsigned long mmio_start, mmio_len;
1815 int err, pci_using_dac;
1816 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1817 u8 part_str[E1000_PBANUM_LENGTH];
1819 /* Catch broken hardware that put the wrong VF device ID in
1820 * the PCIe SR-IOV capability.
1822 if (pdev->is_virtfn) {
1823 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1824 pci_name(pdev), pdev->vendor, pdev->device);
1825 return -EINVAL;
1828 err = pci_enable_device_mem(pdev);
1829 if (err)
1830 return err;
1832 pci_using_dac = 0;
1833 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1834 if (!err) {
1835 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1836 if (!err)
1837 pci_using_dac = 1;
1838 } else {
1839 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1840 if (err) {
1841 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1842 if (err) {
1843 dev_err(&pdev->dev, "No usable DMA "
1844 "configuration, aborting\n");
1845 goto err_dma;
1850 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1851 IORESOURCE_MEM),
1852 igb_driver_name);
1853 if (err)
1854 goto err_pci_reg;
1856 pci_enable_pcie_error_reporting(pdev);
1858 pci_set_master(pdev);
1859 pci_save_state(pdev);
1861 err = -ENOMEM;
1862 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1863 IGB_MAX_TX_QUEUES);
1864 if (!netdev)
1865 goto err_alloc_etherdev;
1867 SET_NETDEV_DEV(netdev, &pdev->dev);
1869 pci_set_drvdata(pdev, netdev);
1870 adapter = netdev_priv(netdev);
1871 adapter->netdev = netdev;
1872 adapter->pdev = pdev;
1873 hw = &adapter->hw;
1874 hw->back = adapter;
1875 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1877 mmio_start = pci_resource_start(pdev, 0);
1878 mmio_len = pci_resource_len(pdev, 0);
1880 err = -EIO;
1881 hw->hw_addr = ioremap(mmio_start, mmio_len);
1882 if (!hw->hw_addr)
1883 goto err_ioremap;
1885 netdev->netdev_ops = &igb_netdev_ops;
1886 igb_set_ethtool_ops(netdev);
1887 netdev->watchdog_timeo = 5 * HZ;
1889 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1891 netdev->mem_start = mmio_start;
1892 netdev->mem_end = mmio_start + mmio_len;
1894 /* PCI config space info */
1895 hw->vendor_id = pdev->vendor;
1896 hw->device_id = pdev->device;
1897 hw->revision_id = pdev->revision;
1898 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1899 hw->subsystem_device_id = pdev->subsystem_device;
1901 /* Copy the default MAC, PHY and NVM function pointers */
1902 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1903 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1904 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1905 /* Initialize skew-specific constants */
1906 err = ei->get_invariants(hw);
1907 if (err)
1908 goto err_sw_init;
1910 /* setup the private structure */
1911 err = igb_sw_init(adapter);
1912 if (err)
1913 goto err_sw_init;
1915 igb_get_bus_info_pcie(hw);
1917 hw->phy.autoneg_wait_to_complete = false;
1919 /* Copper options */
1920 if (hw->phy.media_type == e1000_media_type_copper) {
1921 hw->phy.mdix = AUTO_ALL_MODES;
1922 hw->phy.disable_polarity_correction = false;
1923 hw->phy.ms_type = e1000_ms_hw_default;
1926 if (igb_check_reset_block(hw))
1927 dev_info(&pdev->dev,
1928 "PHY reset is blocked due to SOL/IDER session.\n");
1931 * features is initialized to 0 in allocation, it might have bits
1932 * set by igb_sw_init so we should use an or instead of an
1933 * assignment.
1935 netdev->features |= NETIF_F_SG |
1936 NETIF_F_IP_CSUM |
1937 NETIF_F_IPV6_CSUM |
1938 NETIF_F_TSO |
1939 NETIF_F_TSO6 |
1940 NETIF_F_RXHASH |
1941 NETIF_F_RXCSUM |
1942 NETIF_F_HW_VLAN_RX |
1943 NETIF_F_HW_VLAN_TX;
1945 /* copy netdev features into list of user selectable features */
1946 netdev->hw_features |= netdev->features;
1948 /* set this bit last since it cannot be part of hw_features */
1949 netdev->features |= NETIF_F_HW_VLAN_FILTER;
1951 netdev->vlan_features |= NETIF_F_TSO |
1952 NETIF_F_TSO6 |
1953 NETIF_F_IP_CSUM |
1954 NETIF_F_IPV6_CSUM |
1955 NETIF_F_SG;
1957 if (pci_using_dac) {
1958 netdev->features |= NETIF_F_HIGHDMA;
1959 netdev->vlan_features |= NETIF_F_HIGHDMA;
1962 if (hw->mac.type >= e1000_82576) {
1963 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1964 netdev->features |= NETIF_F_SCTP_CSUM;
1967 netdev->priv_flags |= IFF_UNICAST_FLT;
1969 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1971 /* before reading the NVM, reset the controller to put the device in a
1972 * known good starting state */
1973 hw->mac.ops.reset_hw(hw);
1975 /* make sure the NVM is good */
1976 if (hw->nvm.ops.validate(hw) < 0) {
1977 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1978 err = -EIO;
1979 goto err_eeprom;
1982 /* copy the MAC address out of the NVM */
1983 if (hw->mac.ops.read_mac_addr(hw))
1984 dev_err(&pdev->dev, "NVM Read Error\n");
1986 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1987 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1989 if (!is_valid_ether_addr(netdev->perm_addr)) {
1990 dev_err(&pdev->dev, "Invalid MAC Address\n");
1991 err = -EIO;
1992 goto err_eeprom;
1995 setup_timer(&adapter->watchdog_timer, igb_watchdog,
1996 (unsigned long) adapter);
1997 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1998 (unsigned long) adapter);
2000 INIT_WORK(&adapter->reset_task, igb_reset_task);
2001 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2003 /* Initialize link properties that are user-changeable */
2004 adapter->fc_autoneg = true;
2005 hw->mac.autoneg = true;
2006 hw->phy.autoneg_advertised = 0x2f;
2008 hw->fc.requested_mode = e1000_fc_default;
2009 hw->fc.current_mode = e1000_fc_default;
2011 igb_validate_mdi_setting(hw);
2013 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2014 * enable the ACPI Magic Packet filter
2017 if (hw->bus.func == 0)
2018 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2019 else if (hw->mac.type >= e1000_82580)
2020 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2021 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2022 &eeprom_data);
2023 else if (hw->bus.func == 1)
2024 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2026 if (eeprom_data & eeprom_apme_mask)
2027 adapter->eeprom_wol |= E1000_WUFC_MAG;
2029 /* now that we have the eeprom settings, apply the special cases where
2030 * the eeprom may be wrong or the board simply won't support wake on
2031 * lan on a particular port */
2032 switch (pdev->device) {
2033 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2034 adapter->eeprom_wol = 0;
2035 break;
2036 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2037 case E1000_DEV_ID_82576_FIBER:
2038 case E1000_DEV_ID_82576_SERDES:
2039 /* Wake events only supported on port A for dual fiber
2040 * regardless of eeprom setting */
2041 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2042 adapter->eeprom_wol = 0;
2043 break;
2044 case E1000_DEV_ID_82576_QUAD_COPPER:
2045 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2046 /* if quad port adapter, disable WoL on all but port A */
2047 if (global_quad_port_a != 0)
2048 adapter->eeprom_wol = 0;
2049 else
2050 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2051 /* Reset for multiple quad port adapters */
2052 if (++global_quad_port_a == 4)
2053 global_quad_port_a = 0;
2054 break;
2057 /* initialize the wol settings based on the eeprom settings */
2058 adapter->wol = adapter->eeprom_wol;
2059 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2061 /* reset the hardware with the new settings */
2062 igb_reset(adapter);
2064 /* let the f/w know that the h/w is now under the control of the
2065 * driver. */
2066 igb_get_hw_control(adapter);
2068 strcpy(netdev->name, "eth%d");
2069 err = register_netdev(netdev);
2070 if (err)
2071 goto err_register;
2073 /* carrier off reporting is important to ethtool even BEFORE open */
2074 netif_carrier_off(netdev);
2076 #ifdef CONFIG_IGB_DCA
2077 if (dca_add_requester(&pdev->dev) == 0) {
2078 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2079 dev_info(&pdev->dev, "DCA enabled\n");
2080 igb_setup_dca(adapter);
2083 #endif
2084 /* do hw tstamp init after resetting */
2085 igb_init_hw_timer(adapter);
2087 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2088 /* print bus type/speed/width info */
2089 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2090 netdev->name,
2091 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2092 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2093 "unknown"),
2094 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2095 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2096 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2097 "unknown"),
2098 netdev->dev_addr);
2100 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2101 if (ret_val)
2102 strcpy(part_str, "Unknown");
2103 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2104 dev_info(&pdev->dev,
2105 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2106 adapter->msix_entries ? "MSI-X" :
2107 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2108 adapter->num_rx_queues, adapter->num_tx_queues);
2109 switch (hw->mac.type) {
2110 case e1000_i350:
2111 igb_set_eee_i350(hw);
2112 break;
2113 default:
2114 break;
2116 return 0;
2118 err_register:
2119 igb_release_hw_control(adapter);
2120 err_eeprom:
2121 if (!igb_check_reset_block(hw))
2122 igb_reset_phy(hw);
2124 if (hw->flash_address)
2125 iounmap(hw->flash_address);
2126 err_sw_init:
2127 igb_clear_interrupt_scheme(adapter);
2128 iounmap(hw->hw_addr);
2129 err_ioremap:
2130 free_netdev(netdev);
2131 err_alloc_etherdev:
2132 pci_release_selected_regions(pdev,
2133 pci_select_bars(pdev, IORESOURCE_MEM));
2134 err_pci_reg:
2135 err_dma:
2136 pci_disable_device(pdev);
2137 return err;
2141 * igb_remove - Device Removal Routine
2142 * @pdev: PCI device information struct
2144 * igb_remove is called by the PCI subsystem to alert the driver
2145 * that it should release a PCI device. The could be caused by a
2146 * Hot-Plug event, or because the driver is going to be removed from
2147 * memory.
2149 static void __devexit igb_remove(struct pci_dev *pdev)
2151 struct net_device *netdev = pci_get_drvdata(pdev);
2152 struct igb_adapter *adapter = netdev_priv(netdev);
2153 struct e1000_hw *hw = &adapter->hw;
2156 * The watchdog timer may be rescheduled, so explicitly
2157 * disable watchdog from being rescheduled.
2159 set_bit(__IGB_DOWN, &adapter->state);
2160 del_timer_sync(&adapter->watchdog_timer);
2161 del_timer_sync(&adapter->phy_info_timer);
2163 cancel_work_sync(&adapter->reset_task);
2164 cancel_work_sync(&adapter->watchdog_task);
2166 #ifdef CONFIG_IGB_DCA
2167 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2168 dev_info(&pdev->dev, "DCA disabled\n");
2169 dca_remove_requester(&pdev->dev);
2170 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2171 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2173 #endif
2175 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2176 * would have already happened in close and is redundant. */
2177 igb_release_hw_control(adapter);
2179 unregister_netdev(netdev);
2181 igb_clear_interrupt_scheme(adapter);
2183 #ifdef CONFIG_PCI_IOV
2184 /* reclaim resources allocated to VFs */
2185 if (adapter->vf_data) {
2186 /* disable iov and allow time for transactions to clear */
2187 if (!igb_check_vf_assignment(adapter)) {
2188 pci_disable_sriov(pdev);
2189 msleep(500);
2190 } else {
2191 dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2194 kfree(adapter->vf_data);
2195 adapter->vf_data = NULL;
2196 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2197 wrfl();
2198 msleep(100);
2199 dev_info(&pdev->dev, "IOV Disabled\n");
2201 #endif
2203 iounmap(hw->hw_addr);
2204 if (hw->flash_address)
2205 iounmap(hw->flash_address);
2206 pci_release_selected_regions(pdev,
2207 pci_select_bars(pdev, IORESOURCE_MEM));
2209 free_netdev(netdev);
2211 pci_disable_pcie_error_reporting(pdev);
2213 pci_disable_device(pdev);
2217 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2218 * @adapter: board private structure to initialize
2220 * This function initializes the vf specific data storage and then attempts to
2221 * allocate the VFs. The reason for ordering it this way is because it is much
2222 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2223 * the memory for the VFs.
2225 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2227 #ifdef CONFIG_PCI_IOV
2228 struct pci_dev *pdev = adapter->pdev;
2229 int old_vfs = igb_find_enabled_vfs(adapter);
2230 int i;
2232 if (old_vfs) {
2233 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2234 "max_vfs setting of %d\n", old_vfs, max_vfs);
2235 adapter->vfs_allocated_count = old_vfs;
2238 if (!adapter->vfs_allocated_count)
2239 return;
2241 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2242 sizeof(struct vf_data_storage), GFP_KERNEL);
2243 /* if allocation failed then we do not support SR-IOV */
2244 if (!adapter->vf_data) {
2245 adapter->vfs_allocated_count = 0;
2246 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2247 "Data Storage\n");
2248 goto out;
2251 if (!old_vfs) {
2252 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2253 goto err_out;
2255 dev_info(&pdev->dev, "%d VFs allocated\n",
2256 adapter->vfs_allocated_count);
2257 for (i = 0; i < adapter->vfs_allocated_count; i++)
2258 igb_vf_configure(adapter, i);
2260 /* DMA Coalescing is not supported in IOV mode. */
2261 adapter->flags &= ~IGB_FLAG_DMAC;
2262 goto out;
2263 err_out:
2264 kfree(adapter->vf_data);
2265 adapter->vf_data = NULL;
2266 adapter->vfs_allocated_count = 0;
2267 out:
2268 return;
2269 #endif /* CONFIG_PCI_IOV */
2273 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2274 * @adapter: board private structure to initialize
2276 * igb_init_hw_timer initializes the function pointer and values for the hw
2277 * timer found in hardware.
2279 static void igb_init_hw_timer(struct igb_adapter *adapter)
2281 struct e1000_hw *hw = &adapter->hw;
2283 switch (hw->mac.type) {
2284 case e1000_i350:
2285 case e1000_82580:
2286 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2287 adapter->cycles.read = igb_read_clock;
2288 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2289 adapter->cycles.mult = 1;
2291 * The 82580 timesync updates the system timer every 8ns by 8ns
2292 * and the value cannot be shifted. Instead we need to shift
2293 * the registers to generate a 64bit timer value. As a result
2294 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2295 * 24 in order to generate a larger value for synchronization.
2297 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2298 /* disable system timer temporarily by setting bit 31 */
2299 wr32(E1000_TSAUXC, 0x80000000);
2300 wrfl();
2302 /* Set registers so that rollover occurs soon to test this. */
2303 wr32(E1000_SYSTIMR, 0x00000000);
2304 wr32(E1000_SYSTIML, 0x80000000);
2305 wr32(E1000_SYSTIMH, 0x000000FF);
2306 wrfl();
2308 /* enable system timer by clearing bit 31 */
2309 wr32(E1000_TSAUXC, 0x0);
2310 wrfl();
2312 timecounter_init(&adapter->clock,
2313 &adapter->cycles,
2314 ktime_to_ns(ktime_get_real()));
2316 * Synchronize our NIC clock against system wall clock. NIC
2317 * time stamp reading requires ~3us per sample, each sample
2318 * was pretty stable even under load => only require 10
2319 * samples for each offset comparison.
2321 memset(&adapter->compare, 0, sizeof(adapter->compare));
2322 adapter->compare.source = &adapter->clock;
2323 adapter->compare.target = ktime_get_real;
2324 adapter->compare.num_samples = 10;
2325 timecompare_update(&adapter->compare, 0);
2326 break;
2327 case e1000_82576:
2329 * Initialize hardware timer: we keep it running just in case
2330 * that some program needs it later on.
2332 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2333 adapter->cycles.read = igb_read_clock;
2334 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2335 adapter->cycles.mult = 1;
2337 * Scale the NIC clock cycle by a large factor so that
2338 * relatively small clock corrections can be added or
2339 * subtracted at each clock tick. The drawbacks of a large
2340 * factor are a) that the clock register overflows more quickly
2341 * (not such a big deal) and b) that the increment per tick has
2342 * to fit into 24 bits. As a result we need to use a shift of
2343 * 19 so we can fit a value of 16 into the TIMINCA register.
2345 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2346 wr32(E1000_TIMINCA,
2347 (1 << E1000_TIMINCA_16NS_SHIFT) |
2348 (16 << IGB_82576_TSYNC_SHIFT));
2350 /* Set registers so that rollover occurs soon to test this. */
2351 wr32(E1000_SYSTIML, 0x00000000);
2352 wr32(E1000_SYSTIMH, 0xFF800000);
2353 wrfl();
2355 timecounter_init(&adapter->clock,
2356 &adapter->cycles,
2357 ktime_to_ns(ktime_get_real()));
2359 * Synchronize our NIC clock against system wall clock. NIC
2360 * time stamp reading requires ~3us per sample, each sample
2361 * was pretty stable even under load => only require 10
2362 * samples for each offset comparison.
2364 memset(&adapter->compare, 0, sizeof(adapter->compare));
2365 adapter->compare.source = &adapter->clock;
2366 adapter->compare.target = ktime_get_real;
2367 adapter->compare.num_samples = 10;
2368 timecompare_update(&adapter->compare, 0);
2369 break;
2370 case e1000_82575:
2371 /* 82575 does not support timesync */
2372 default:
2373 break;
2379 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2380 * @adapter: board private structure to initialize
2382 * igb_sw_init initializes the Adapter private data structure.
2383 * Fields are initialized based on PCI device information and
2384 * OS network device settings (MTU size).
2386 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2388 struct e1000_hw *hw = &adapter->hw;
2389 struct net_device *netdev = adapter->netdev;
2390 struct pci_dev *pdev = adapter->pdev;
2392 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2394 /* set default ring sizes */
2395 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2396 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2398 /* set default ITR values */
2399 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2400 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2402 /* set default work limits */
2403 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2405 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2406 VLAN_HLEN;
2407 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2409 adapter->node = -1;
2411 spin_lock_init(&adapter->stats64_lock);
2412 #ifdef CONFIG_PCI_IOV
2413 switch (hw->mac.type) {
2414 case e1000_82576:
2415 case e1000_i350:
2416 if (max_vfs > 7) {
2417 dev_warn(&pdev->dev,
2418 "Maximum of 7 VFs per PF, using max\n");
2419 adapter->vfs_allocated_count = 7;
2420 } else
2421 adapter->vfs_allocated_count = max_vfs;
2422 break;
2423 default:
2424 break;
2426 #endif /* CONFIG_PCI_IOV */
2427 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2428 /* i350 cannot do RSS and SR-IOV at the same time */
2429 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2430 adapter->rss_queues = 1;
2433 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2434 * then we should combine the queues into a queue pair in order to
2435 * conserve interrupts due to limited supply
2437 if ((adapter->rss_queues > 4) ||
2438 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2439 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2441 /* This call may decrease the number of queues */
2442 if (igb_init_interrupt_scheme(adapter)) {
2443 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2444 return -ENOMEM;
2447 igb_probe_vfs(adapter);
2449 /* Explicitly disable IRQ since the NIC can be in any state. */
2450 igb_irq_disable(adapter);
2452 if (hw->mac.type == e1000_i350)
2453 adapter->flags &= ~IGB_FLAG_DMAC;
2455 set_bit(__IGB_DOWN, &adapter->state);
2456 return 0;
2460 * igb_open - Called when a network interface is made active
2461 * @netdev: network interface device structure
2463 * Returns 0 on success, negative value on failure
2465 * The open entry point is called when a network interface is made
2466 * active by the system (IFF_UP). At this point all resources needed
2467 * for transmit and receive operations are allocated, the interrupt
2468 * handler is registered with the OS, the watchdog timer is started,
2469 * and the stack is notified that the interface is ready.
2471 static int igb_open(struct net_device *netdev)
2473 struct igb_adapter *adapter = netdev_priv(netdev);
2474 struct e1000_hw *hw = &adapter->hw;
2475 int err;
2476 int i;
2478 /* disallow open during test */
2479 if (test_bit(__IGB_TESTING, &adapter->state))
2480 return -EBUSY;
2482 netif_carrier_off(netdev);
2484 /* allocate transmit descriptors */
2485 err = igb_setup_all_tx_resources(adapter);
2486 if (err)
2487 goto err_setup_tx;
2489 /* allocate receive descriptors */
2490 err = igb_setup_all_rx_resources(adapter);
2491 if (err)
2492 goto err_setup_rx;
2494 igb_power_up_link(adapter);
2496 /* before we allocate an interrupt, we must be ready to handle it.
2497 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2498 * as soon as we call pci_request_irq, so we have to setup our
2499 * clean_rx handler before we do so. */
2500 igb_configure(adapter);
2502 err = igb_request_irq(adapter);
2503 if (err)
2504 goto err_req_irq;
2506 /* From here on the code is the same as igb_up() */
2507 clear_bit(__IGB_DOWN, &adapter->state);
2509 for (i = 0; i < adapter->num_q_vectors; i++)
2510 napi_enable(&(adapter->q_vector[i]->napi));
2512 /* Clear any pending interrupts. */
2513 rd32(E1000_ICR);
2515 igb_irq_enable(adapter);
2517 /* notify VFs that reset has been completed */
2518 if (adapter->vfs_allocated_count) {
2519 u32 reg_data = rd32(E1000_CTRL_EXT);
2520 reg_data |= E1000_CTRL_EXT_PFRSTD;
2521 wr32(E1000_CTRL_EXT, reg_data);
2524 netif_tx_start_all_queues(netdev);
2526 /* start the watchdog. */
2527 hw->mac.get_link_status = 1;
2528 schedule_work(&adapter->watchdog_task);
2530 return 0;
2532 err_req_irq:
2533 igb_release_hw_control(adapter);
2534 igb_power_down_link(adapter);
2535 igb_free_all_rx_resources(adapter);
2536 err_setup_rx:
2537 igb_free_all_tx_resources(adapter);
2538 err_setup_tx:
2539 igb_reset(adapter);
2541 return err;
2545 * igb_close - Disables a network interface
2546 * @netdev: network interface device structure
2548 * Returns 0, this is not allowed to fail
2550 * The close entry point is called when an interface is de-activated
2551 * by the OS. The hardware is still under the driver's control, but
2552 * needs to be disabled. A global MAC reset is issued to stop the
2553 * hardware, and all transmit and receive resources are freed.
2555 static int igb_close(struct net_device *netdev)
2557 struct igb_adapter *adapter = netdev_priv(netdev);
2559 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2560 igb_down(adapter);
2562 igb_free_irq(adapter);
2564 igb_free_all_tx_resources(adapter);
2565 igb_free_all_rx_resources(adapter);
2567 return 0;
2571 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2572 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2574 * Return 0 on success, negative on failure
2576 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2578 struct device *dev = tx_ring->dev;
2579 int orig_node = dev_to_node(dev);
2580 int size;
2582 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2583 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2584 if (!tx_ring->tx_buffer_info)
2585 tx_ring->tx_buffer_info = vzalloc(size);
2586 if (!tx_ring->tx_buffer_info)
2587 goto err;
2589 /* round up to nearest 4K */
2590 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2591 tx_ring->size = ALIGN(tx_ring->size, 4096);
2593 set_dev_node(dev, tx_ring->numa_node);
2594 tx_ring->desc = dma_alloc_coherent(dev,
2595 tx_ring->size,
2596 &tx_ring->dma,
2597 GFP_KERNEL);
2598 set_dev_node(dev, orig_node);
2599 if (!tx_ring->desc)
2600 tx_ring->desc = dma_alloc_coherent(dev,
2601 tx_ring->size,
2602 &tx_ring->dma,
2603 GFP_KERNEL);
2605 if (!tx_ring->desc)
2606 goto err;
2608 tx_ring->next_to_use = 0;
2609 tx_ring->next_to_clean = 0;
2611 return 0;
2613 err:
2614 vfree(tx_ring->tx_buffer_info);
2615 dev_err(dev,
2616 "Unable to allocate memory for the transmit descriptor ring\n");
2617 return -ENOMEM;
2621 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2622 * (Descriptors) for all queues
2623 * @adapter: board private structure
2625 * Return 0 on success, negative on failure
2627 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2629 struct pci_dev *pdev = adapter->pdev;
2630 int i, err = 0;
2632 for (i = 0; i < adapter->num_tx_queues; i++) {
2633 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2634 if (err) {
2635 dev_err(&pdev->dev,
2636 "Allocation for Tx Queue %u failed\n", i);
2637 for (i--; i >= 0; i--)
2638 igb_free_tx_resources(adapter->tx_ring[i]);
2639 break;
2643 return err;
2647 * igb_setup_tctl - configure the transmit control registers
2648 * @adapter: Board private structure
2650 void igb_setup_tctl(struct igb_adapter *adapter)
2652 struct e1000_hw *hw = &adapter->hw;
2653 u32 tctl;
2655 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2656 wr32(E1000_TXDCTL(0), 0);
2658 /* Program the Transmit Control Register */
2659 tctl = rd32(E1000_TCTL);
2660 tctl &= ~E1000_TCTL_CT;
2661 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2662 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2664 igb_config_collision_dist(hw);
2666 /* Enable transmits */
2667 tctl |= E1000_TCTL_EN;
2669 wr32(E1000_TCTL, tctl);
2673 * igb_configure_tx_ring - Configure transmit ring after Reset
2674 * @adapter: board private structure
2675 * @ring: tx ring to configure
2677 * Configure a transmit ring after a reset.
2679 void igb_configure_tx_ring(struct igb_adapter *adapter,
2680 struct igb_ring *ring)
2682 struct e1000_hw *hw = &adapter->hw;
2683 u32 txdctl = 0;
2684 u64 tdba = ring->dma;
2685 int reg_idx = ring->reg_idx;
2687 /* disable the queue */
2688 wr32(E1000_TXDCTL(reg_idx), 0);
2689 wrfl();
2690 mdelay(10);
2692 wr32(E1000_TDLEN(reg_idx),
2693 ring->count * sizeof(union e1000_adv_tx_desc));
2694 wr32(E1000_TDBAL(reg_idx),
2695 tdba & 0x00000000ffffffffULL);
2696 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2698 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2699 wr32(E1000_TDH(reg_idx), 0);
2700 writel(0, ring->tail);
2702 txdctl |= IGB_TX_PTHRESH;
2703 txdctl |= IGB_TX_HTHRESH << 8;
2704 txdctl |= IGB_TX_WTHRESH << 16;
2706 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2707 wr32(E1000_TXDCTL(reg_idx), txdctl);
2711 * igb_configure_tx - Configure transmit Unit after Reset
2712 * @adapter: board private structure
2714 * Configure the Tx unit of the MAC after a reset.
2716 static void igb_configure_tx(struct igb_adapter *adapter)
2718 int i;
2720 for (i = 0; i < adapter->num_tx_queues; i++)
2721 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2725 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2726 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2728 * Returns 0 on success, negative on failure
2730 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2732 struct device *dev = rx_ring->dev;
2733 int orig_node = dev_to_node(dev);
2734 int size, desc_len;
2736 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2737 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2738 if (!rx_ring->rx_buffer_info)
2739 rx_ring->rx_buffer_info = vzalloc(size);
2740 if (!rx_ring->rx_buffer_info)
2741 goto err;
2743 desc_len = sizeof(union e1000_adv_rx_desc);
2745 /* Round up to nearest 4K */
2746 rx_ring->size = rx_ring->count * desc_len;
2747 rx_ring->size = ALIGN(rx_ring->size, 4096);
2749 set_dev_node(dev, rx_ring->numa_node);
2750 rx_ring->desc = dma_alloc_coherent(dev,
2751 rx_ring->size,
2752 &rx_ring->dma,
2753 GFP_KERNEL);
2754 set_dev_node(dev, orig_node);
2755 if (!rx_ring->desc)
2756 rx_ring->desc = dma_alloc_coherent(dev,
2757 rx_ring->size,
2758 &rx_ring->dma,
2759 GFP_KERNEL);
2761 if (!rx_ring->desc)
2762 goto err;
2764 rx_ring->next_to_clean = 0;
2765 rx_ring->next_to_use = 0;
2767 return 0;
2769 err:
2770 vfree(rx_ring->rx_buffer_info);
2771 rx_ring->rx_buffer_info = NULL;
2772 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2773 " ring\n");
2774 return -ENOMEM;
2778 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2779 * (Descriptors) for all queues
2780 * @adapter: board private structure
2782 * Return 0 on success, negative on failure
2784 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2786 struct pci_dev *pdev = adapter->pdev;
2787 int i, err = 0;
2789 for (i = 0; i < adapter->num_rx_queues; i++) {
2790 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2791 if (err) {
2792 dev_err(&pdev->dev,
2793 "Allocation for Rx Queue %u failed\n", i);
2794 for (i--; i >= 0; i--)
2795 igb_free_rx_resources(adapter->rx_ring[i]);
2796 break;
2800 return err;
2804 * igb_setup_mrqc - configure the multiple receive queue control registers
2805 * @adapter: Board private structure
2807 static void igb_setup_mrqc(struct igb_adapter *adapter)
2809 struct e1000_hw *hw = &adapter->hw;
2810 u32 mrqc, rxcsum;
2811 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2812 union e1000_reta {
2813 u32 dword;
2814 u8 bytes[4];
2815 } reta;
2816 static const u8 rsshash[40] = {
2817 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2818 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2819 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2820 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2822 /* Fill out hash function seeds */
2823 for (j = 0; j < 10; j++) {
2824 u32 rsskey = rsshash[(j * 4)];
2825 rsskey |= rsshash[(j * 4) + 1] << 8;
2826 rsskey |= rsshash[(j * 4) + 2] << 16;
2827 rsskey |= rsshash[(j * 4) + 3] << 24;
2828 array_wr32(E1000_RSSRK(0), j, rsskey);
2831 num_rx_queues = adapter->rss_queues;
2833 if (adapter->vfs_allocated_count) {
2834 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2835 switch (hw->mac.type) {
2836 case e1000_i350:
2837 case e1000_82580:
2838 num_rx_queues = 1;
2839 shift = 0;
2840 break;
2841 case e1000_82576:
2842 shift = 3;
2843 num_rx_queues = 2;
2844 break;
2845 case e1000_82575:
2846 shift = 2;
2847 shift2 = 6;
2848 default:
2849 break;
2851 } else {
2852 if (hw->mac.type == e1000_82575)
2853 shift = 6;
2856 for (j = 0; j < (32 * 4); j++) {
2857 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2858 if (shift2)
2859 reta.bytes[j & 3] |= num_rx_queues << shift2;
2860 if ((j & 3) == 3)
2861 wr32(E1000_RETA(j >> 2), reta.dword);
2865 * Disable raw packet checksumming so that RSS hash is placed in
2866 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2867 * offloads as they are enabled by default
2869 rxcsum = rd32(E1000_RXCSUM);
2870 rxcsum |= E1000_RXCSUM_PCSD;
2872 if (adapter->hw.mac.type >= e1000_82576)
2873 /* Enable Receive Checksum Offload for SCTP */
2874 rxcsum |= E1000_RXCSUM_CRCOFL;
2876 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2877 wr32(E1000_RXCSUM, rxcsum);
2879 /* If VMDq is enabled then we set the appropriate mode for that, else
2880 * we default to RSS so that an RSS hash is calculated per packet even
2881 * if we are only using one queue */
2882 if (adapter->vfs_allocated_count) {
2883 if (hw->mac.type > e1000_82575) {
2884 /* Set the default pool for the PF's first queue */
2885 u32 vtctl = rd32(E1000_VT_CTL);
2886 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2887 E1000_VT_CTL_DISABLE_DEF_POOL);
2888 vtctl |= adapter->vfs_allocated_count <<
2889 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2890 wr32(E1000_VT_CTL, vtctl);
2892 if (adapter->rss_queues > 1)
2893 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2894 else
2895 mrqc = E1000_MRQC_ENABLE_VMDQ;
2896 } else {
2897 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2899 igb_vmm_control(adapter);
2902 * Generate RSS hash based on TCP port numbers and/or
2903 * IPv4/v6 src and dst addresses since UDP cannot be
2904 * hashed reliably due to IP fragmentation
2906 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2907 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2908 E1000_MRQC_RSS_FIELD_IPV6 |
2909 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2910 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2912 wr32(E1000_MRQC, mrqc);
2916 * igb_setup_rctl - configure the receive control registers
2917 * @adapter: Board private structure
2919 void igb_setup_rctl(struct igb_adapter *adapter)
2921 struct e1000_hw *hw = &adapter->hw;
2922 u32 rctl;
2924 rctl = rd32(E1000_RCTL);
2926 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2927 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2929 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2930 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2933 * enable stripping of CRC. It's unlikely this will break BMC
2934 * redirection as it did with e1000. Newer features require
2935 * that the HW strips the CRC.
2937 rctl |= E1000_RCTL_SECRC;
2939 /* disable store bad packets and clear size bits. */
2940 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2942 /* enable LPE to prevent packets larger than max_frame_size */
2943 rctl |= E1000_RCTL_LPE;
2945 /* disable queue 0 to prevent tail write w/o re-config */
2946 wr32(E1000_RXDCTL(0), 0);
2948 /* Attention!!! For SR-IOV PF driver operations you must enable
2949 * queue drop for all VF and PF queues to prevent head of line blocking
2950 * if an un-trusted VF does not provide descriptors to hardware.
2952 if (adapter->vfs_allocated_count) {
2953 /* set all queue drop enable bits */
2954 wr32(E1000_QDE, ALL_QUEUES);
2957 wr32(E1000_RCTL, rctl);
2960 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2961 int vfn)
2963 struct e1000_hw *hw = &adapter->hw;
2964 u32 vmolr;
2966 /* if it isn't the PF check to see if VFs are enabled and
2967 * increase the size to support vlan tags */
2968 if (vfn < adapter->vfs_allocated_count &&
2969 adapter->vf_data[vfn].vlans_enabled)
2970 size += VLAN_TAG_SIZE;
2972 vmolr = rd32(E1000_VMOLR(vfn));
2973 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2974 vmolr |= size | E1000_VMOLR_LPE;
2975 wr32(E1000_VMOLR(vfn), vmolr);
2977 return 0;
2981 * igb_rlpml_set - set maximum receive packet size
2982 * @adapter: board private structure
2984 * Configure maximum receivable packet size.
2986 static void igb_rlpml_set(struct igb_adapter *adapter)
2988 u32 max_frame_size = adapter->max_frame_size;
2989 struct e1000_hw *hw = &adapter->hw;
2990 u16 pf_id = adapter->vfs_allocated_count;
2992 if (pf_id) {
2993 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2995 * If we're in VMDQ or SR-IOV mode, then set global RLPML
2996 * to our max jumbo frame size, in case we need to enable
2997 * jumbo frames on one of the rings later.
2998 * This will not pass over-length frames into the default
2999 * queue because it's gated by the VMOLR.RLPML.
3001 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3004 wr32(E1000_RLPML, max_frame_size);
3007 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3008 int vfn, bool aupe)
3010 struct e1000_hw *hw = &adapter->hw;
3011 u32 vmolr;
3014 * This register exists only on 82576 and newer so if we are older then
3015 * we should exit and do nothing
3017 if (hw->mac.type < e1000_82576)
3018 return;
3020 vmolr = rd32(E1000_VMOLR(vfn));
3021 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3022 if (aupe)
3023 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3024 else
3025 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3027 /* clear all bits that might not be set */
3028 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3030 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3031 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3033 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3034 * multicast packets
3036 if (vfn <= adapter->vfs_allocated_count)
3037 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3039 wr32(E1000_VMOLR(vfn), vmolr);
3043 * igb_configure_rx_ring - Configure a receive ring after Reset
3044 * @adapter: board private structure
3045 * @ring: receive ring to be configured
3047 * Configure the Rx unit of the MAC after a reset.
3049 void igb_configure_rx_ring(struct igb_adapter *adapter,
3050 struct igb_ring *ring)
3052 struct e1000_hw *hw = &adapter->hw;
3053 u64 rdba = ring->dma;
3054 int reg_idx = ring->reg_idx;
3055 u32 srrctl = 0, rxdctl = 0;
3057 /* disable the queue */
3058 wr32(E1000_RXDCTL(reg_idx), 0);
3060 /* Set DMA base address registers */
3061 wr32(E1000_RDBAL(reg_idx),
3062 rdba & 0x00000000ffffffffULL);
3063 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3064 wr32(E1000_RDLEN(reg_idx),
3065 ring->count * sizeof(union e1000_adv_rx_desc));
3067 /* initialize head and tail */
3068 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3069 wr32(E1000_RDH(reg_idx), 0);
3070 writel(0, ring->tail);
3072 /* set descriptor configuration */
3073 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3074 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3075 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3076 #else
3077 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3078 #endif
3079 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3080 if (hw->mac.type >= e1000_82580)
3081 srrctl |= E1000_SRRCTL_TIMESTAMP;
3082 /* Only set Drop Enable if we are supporting multiple queues */
3083 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3084 srrctl |= E1000_SRRCTL_DROP_EN;
3086 wr32(E1000_SRRCTL(reg_idx), srrctl);
3088 /* set filtering for VMDQ pools */
3089 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3091 rxdctl |= IGB_RX_PTHRESH;
3092 rxdctl |= IGB_RX_HTHRESH << 8;
3093 rxdctl |= IGB_RX_WTHRESH << 16;
3095 /* enable receive descriptor fetching */
3096 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3097 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3101 * igb_configure_rx - Configure receive Unit after Reset
3102 * @adapter: board private structure
3104 * Configure the Rx unit of the MAC after a reset.
3106 static void igb_configure_rx(struct igb_adapter *adapter)
3108 int i;
3110 /* set UTA to appropriate mode */
3111 igb_set_uta(adapter);
3113 /* set the correct pool for the PF default MAC address in entry 0 */
3114 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3115 adapter->vfs_allocated_count);
3117 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3118 * the Base and Length of the Rx Descriptor Ring */
3119 for (i = 0; i < adapter->num_rx_queues; i++)
3120 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3124 * igb_free_tx_resources - Free Tx Resources per Queue
3125 * @tx_ring: Tx descriptor ring for a specific queue
3127 * Free all transmit software resources
3129 void igb_free_tx_resources(struct igb_ring *tx_ring)
3131 igb_clean_tx_ring(tx_ring);
3133 vfree(tx_ring->tx_buffer_info);
3134 tx_ring->tx_buffer_info = NULL;
3136 /* if not set, then don't free */
3137 if (!tx_ring->desc)
3138 return;
3140 dma_free_coherent(tx_ring->dev, tx_ring->size,
3141 tx_ring->desc, tx_ring->dma);
3143 tx_ring->desc = NULL;
3147 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3148 * @adapter: board private structure
3150 * Free all transmit software resources
3152 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3154 int i;
3156 for (i = 0; i < adapter->num_tx_queues; i++)
3157 igb_free_tx_resources(adapter->tx_ring[i]);
3160 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3161 struct igb_tx_buffer *tx_buffer)
3163 if (tx_buffer->skb) {
3164 dev_kfree_skb_any(tx_buffer->skb);
3165 if (tx_buffer->dma)
3166 dma_unmap_single(ring->dev,
3167 tx_buffer->dma,
3168 tx_buffer->length,
3169 DMA_TO_DEVICE);
3170 } else if (tx_buffer->dma) {
3171 dma_unmap_page(ring->dev,
3172 tx_buffer->dma,
3173 tx_buffer->length,
3174 DMA_TO_DEVICE);
3176 tx_buffer->next_to_watch = NULL;
3177 tx_buffer->skb = NULL;
3178 tx_buffer->dma = 0;
3179 /* buffer_info must be completely set up in the transmit path */
3183 * igb_clean_tx_ring - Free Tx Buffers
3184 * @tx_ring: ring to be cleaned
3186 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3188 struct igb_tx_buffer *buffer_info;
3189 unsigned long size;
3190 u16 i;
3192 if (!tx_ring->tx_buffer_info)
3193 return;
3194 /* Free all the Tx ring sk_buffs */
3196 for (i = 0; i < tx_ring->count; i++) {
3197 buffer_info = &tx_ring->tx_buffer_info[i];
3198 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3201 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3202 memset(tx_ring->tx_buffer_info, 0, size);
3204 /* Zero out the descriptor ring */
3205 memset(tx_ring->desc, 0, tx_ring->size);
3207 tx_ring->next_to_use = 0;
3208 tx_ring->next_to_clean = 0;
3212 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3213 * @adapter: board private structure
3215 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3217 int i;
3219 for (i = 0; i < adapter->num_tx_queues; i++)
3220 igb_clean_tx_ring(adapter->tx_ring[i]);
3224 * igb_free_rx_resources - Free Rx Resources
3225 * @rx_ring: ring to clean the resources from
3227 * Free all receive software resources
3229 void igb_free_rx_resources(struct igb_ring *rx_ring)
3231 igb_clean_rx_ring(rx_ring);
3233 vfree(rx_ring->rx_buffer_info);
3234 rx_ring->rx_buffer_info = NULL;
3236 /* if not set, then don't free */
3237 if (!rx_ring->desc)
3238 return;
3240 dma_free_coherent(rx_ring->dev, rx_ring->size,
3241 rx_ring->desc, rx_ring->dma);
3243 rx_ring->desc = NULL;
3247 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3248 * @adapter: board private structure
3250 * Free all receive software resources
3252 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3254 int i;
3256 for (i = 0; i < adapter->num_rx_queues; i++)
3257 igb_free_rx_resources(adapter->rx_ring[i]);
3261 * igb_clean_rx_ring - Free Rx Buffers per Queue
3262 * @rx_ring: ring to free buffers from
3264 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3266 unsigned long size;
3267 u16 i;
3269 if (!rx_ring->rx_buffer_info)
3270 return;
3272 /* Free all the Rx ring sk_buffs */
3273 for (i = 0; i < rx_ring->count; i++) {
3274 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3275 if (buffer_info->dma) {
3276 dma_unmap_single(rx_ring->dev,
3277 buffer_info->dma,
3278 IGB_RX_HDR_LEN,
3279 DMA_FROM_DEVICE);
3280 buffer_info->dma = 0;
3283 if (buffer_info->skb) {
3284 dev_kfree_skb(buffer_info->skb);
3285 buffer_info->skb = NULL;
3287 if (buffer_info->page_dma) {
3288 dma_unmap_page(rx_ring->dev,
3289 buffer_info->page_dma,
3290 PAGE_SIZE / 2,
3291 DMA_FROM_DEVICE);
3292 buffer_info->page_dma = 0;
3294 if (buffer_info->page) {
3295 put_page(buffer_info->page);
3296 buffer_info->page = NULL;
3297 buffer_info->page_offset = 0;
3301 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3302 memset(rx_ring->rx_buffer_info, 0, size);
3304 /* Zero out the descriptor ring */
3305 memset(rx_ring->desc, 0, rx_ring->size);
3307 rx_ring->next_to_clean = 0;
3308 rx_ring->next_to_use = 0;
3312 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3313 * @adapter: board private structure
3315 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3317 int i;
3319 for (i = 0; i < adapter->num_rx_queues; i++)
3320 igb_clean_rx_ring(adapter->rx_ring[i]);
3324 * igb_set_mac - Change the Ethernet Address of the NIC
3325 * @netdev: network interface device structure
3326 * @p: pointer to an address structure
3328 * Returns 0 on success, negative on failure
3330 static int igb_set_mac(struct net_device *netdev, void *p)
3332 struct igb_adapter *adapter = netdev_priv(netdev);
3333 struct e1000_hw *hw = &adapter->hw;
3334 struct sockaddr *addr = p;
3336 if (!is_valid_ether_addr(addr->sa_data))
3337 return -EADDRNOTAVAIL;
3339 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3340 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3342 /* set the correct pool for the new PF MAC address in entry 0 */
3343 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3344 adapter->vfs_allocated_count);
3346 return 0;
3350 * igb_write_mc_addr_list - write multicast addresses to MTA
3351 * @netdev: network interface device structure
3353 * Writes multicast address list to the MTA hash table.
3354 * Returns: -ENOMEM on failure
3355 * 0 on no addresses written
3356 * X on writing X addresses to MTA
3358 static int igb_write_mc_addr_list(struct net_device *netdev)
3360 struct igb_adapter *adapter = netdev_priv(netdev);
3361 struct e1000_hw *hw = &adapter->hw;
3362 struct netdev_hw_addr *ha;
3363 u8 *mta_list;
3364 int i;
3366 if (netdev_mc_empty(netdev)) {
3367 /* nothing to program, so clear mc list */
3368 igb_update_mc_addr_list(hw, NULL, 0);
3369 igb_restore_vf_multicasts(adapter);
3370 return 0;
3373 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3374 if (!mta_list)
3375 return -ENOMEM;
3377 /* The shared function expects a packed array of only addresses. */
3378 i = 0;
3379 netdev_for_each_mc_addr(ha, netdev)
3380 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3382 igb_update_mc_addr_list(hw, mta_list, i);
3383 kfree(mta_list);
3385 return netdev_mc_count(netdev);
3389 * igb_write_uc_addr_list - write unicast addresses to RAR table
3390 * @netdev: network interface device structure
3392 * Writes unicast address list to the RAR table.
3393 * Returns: -ENOMEM on failure/insufficient address space
3394 * 0 on no addresses written
3395 * X on writing X addresses to the RAR table
3397 static int igb_write_uc_addr_list(struct net_device *netdev)
3399 struct igb_adapter *adapter = netdev_priv(netdev);
3400 struct e1000_hw *hw = &adapter->hw;
3401 unsigned int vfn = adapter->vfs_allocated_count;
3402 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3403 int count = 0;
3405 /* return ENOMEM indicating insufficient memory for addresses */
3406 if (netdev_uc_count(netdev) > rar_entries)
3407 return -ENOMEM;
3409 if (!netdev_uc_empty(netdev) && rar_entries) {
3410 struct netdev_hw_addr *ha;
3412 netdev_for_each_uc_addr(ha, netdev) {
3413 if (!rar_entries)
3414 break;
3415 igb_rar_set_qsel(adapter, ha->addr,
3416 rar_entries--,
3417 vfn);
3418 count++;
3421 /* write the addresses in reverse order to avoid write combining */
3422 for (; rar_entries > 0 ; rar_entries--) {
3423 wr32(E1000_RAH(rar_entries), 0);
3424 wr32(E1000_RAL(rar_entries), 0);
3426 wrfl();
3428 return count;
3432 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3433 * @netdev: network interface device structure
3435 * The set_rx_mode entry point is called whenever the unicast or multicast
3436 * address lists or the network interface flags are updated. This routine is
3437 * responsible for configuring the hardware for proper unicast, multicast,
3438 * promiscuous mode, and all-multi behavior.
3440 static void igb_set_rx_mode(struct net_device *netdev)
3442 struct igb_adapter *adapter = netdev_priv(netdev);
3443 struct e1000_hw *hw = &adapter->hw;
3444 unsigned int vfn = adapter->vfs_allocated_count;
3445 u32 rctl, vmolr = 0;
3446 int count;
3448 /* Check for Promiscuous and All Multicast modes */
3449 rctl = rd32(E1000_RCTL);
3451 /* clear the effected bits */
3452 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3454 if (netdev->flags & IFF_PROMISC) {
3455 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3456 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3457 } else {
3458 if (netdev->flags & IFF_ALLMULTI) {
3459 rctl |= E1000_RCTL_MPE;
3460 vmolr |= E1000_VMOLR_MPME;
3461 } else {
3463 * Write addresses to the MTA, if the attempt fails
3464 * then we should just turn on promiscuous mode so
3465 * that we can at least receive multicast traffic
3467 count = igb_write_mc_addr_list(netdev);
3468 if (count < 0) {
3469 rctl |= E1000_RCTL_MPE;
3470 vmolr |= E1000_VMOLR_MPME;
3471 } else if (count) {
3472 vmolr |= E1000_VMOLR_ROMPE;
3476 * Write addresses to available RAR registers, if there is not
3477 * sufficient space to store all the addresses then enable
3478 * unicast promiscuous mode
3480 count = igb_write_uc_addr_list(netdev);
3481 if (count < 0) {
3482 rctl |= E1000_RCTL_UPE;
3483 vmolr |= E1000_VMOLR_ROPE;
3485 rctl |= E1000_RCTL_VFE;
3487 wr32(E1000_RCTL, rctl);
3490 * In order to support SR-IOV and eventually VMDq it is necessary to set
3491 * the VMOLR to enable the appropriate modes. Without this workaround
3492 * we will have issues with VLAN tag stripping not being done for frames
3493 * that are only arriving because we are the default pool
3495 if (hw->mac.type < e1000_82576)
3496 return;
3498 vmolr |= rd32(E1000_VMOLR(vfn)) &
3499 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3500 wr32(E1000_VMOLR(vfn), vmolr);
3501 igb_restore_vf_multicasts(adapter);
3504 static void igb_check_wvbr(struct igb_adapter *adapter)
3506 struct e1000_hw *hw = &adapter->hw;
3507 u32 wvbr = 0;
3509 switch (hw->mac.type) {
3510 case e1000_82576:
3511 case e1000_i350:
3512 if (!(wvbr = rd32(E1000_WVBR)))
3513 return;
3514 break;
3515 default:
3516 break;
3519 adapter->wvbr |= wvbr;
3522 #define IGB_STAGGERED_QUEUE_OFFSET 8
3524 static void igb_spoof_check(struct igb_adapter *adapter)
3526 int j;
3528 if (!adapter->wvbr)
3529 return;
3531 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3532 if (adapter->wvbr & (1 << j) ||
3533 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3534 dev_warn(&adapter->pdev->dev,
3535 "Spoof event(s) detected on VF %d\n", j);
3536 adapter->wvbr &=
3537 ~((1 << j) |
3538 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3543 /* Need to wait a few seconds after link up to get diagnostic information from
3544 * the phy */
3545 static void igb_update_phy_info(unsigned long data)
3547 struct igb_adapter *adapter = (struct igb_adapter *) data;
3548 igb_get_phy_info(&adapter->hw);
3552 * igb_has_link - check shared code for link and determine up/down
3553 * @adapter: pointer to driver private info
3555 bool igb_has_link(struct igb_adapter *adapter)
3557 struct e1000_hw *hw = &adapter->hw;
3558 bool link_active = false;
3559 s32 ret_val = 0;
3561 /* get_link_status is set on LSC (link status) interrupt or
3562 * rx sequence error interrupt. get_link_status will stay
3563 * false until the e1000_check_for_link establishes link
3564 * for copper adapters ONLY
3566 switch (hw->phy.media_type) {
3567 case e1000_media_type_copper:
3568 if (hw->mac.get_link_status) {
3569 ret_val = hw->mac.ops.check_for_link(hw);
3570 link_active = !hw->mac.get_link_status;
3571 } else {
3572 link_active = true;
3574 break;
3575 case e1000_media_type_internal_serdes:
3576 ret_val = hw->mac.ops.check_for_link(hw);
3577 link_active = hw->mac.serdes_has_link;
3578 break;
3579 default:
3580 case e1000_media_type_unknown:
3581 break;
3584 return link_active;
3587 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3589 bool ret = false;
3590 u32 ctrl_ext, thstat;
3592 /* check for thermal sensor event on i350, copper only */
3593 if (hw->mac.type == e1000_i350) {
3594 thstat = rd32(E1000_THSTAT);
3595 ctrl_ext = rd32(E1000_CTRL_EXT);
3597 if ((hw->phy.media_type == e1000_media_type_copper) &&
3598 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3599 ret = !!(thstat & event);
3603 return ret;
3607 * igb_watchdog - Timer Call-back
3608 * @data: pointer to adapter cast into an unsigned long
3610 static void igb_watchdog(unsigned long data)
3612 struct igb_adapter *adapter = (struct igb_adapter *)data;
3613 /* Do the rest outside of interrupt context */
3614 schedule_work(&adapter->watchdog_task);
3617 static void igb_watchdog_task(struct work_struct *work)
3619 struct igb_adapter *adapter = container_of(work,
3620 struct igb_adapter,
3621 watchdog_task);
3622 struct e1000_hw *hw = &adapter->hw;
3623 struct net_device *netdev = adapter->netdev;
3624 u32 link;
3625 int i;
3627 link = igb_has_link(adapter);
3628 if (link) {
3629 if (!netif_carrier_ok(netdev)) {
3630 u32 ctrl;
3631 hw->mac.ops.get_speed_and_duplex(hw,
3632 &adapter->link_speed,
3633 &adapter->link_duplex);
3635 ctrl = rd32(E1000_CTRL);
3636 /* Links status message must follow this format */
3637 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3638 "Flow Control: %s\n",
3639 netdev->name,
3640 adapter->link_speed,
3641 adapter->link_duplex == FULL_DUPLEX ?
3642 "Full Duplex" : "Half Duplex",
3643 ((ctrl & E1000_CTRL_TFCE) &&
3644 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3645 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3646 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3648 /* check for thermal sensor event */
3649 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3650 printk(KERN_INFO "igb: %s The network adapter "
3651 "link speed was downshifted "
3652 "because it overheated.\n",
3653 netdev->name);
3656 /* adjust timeout factor according to speed/duplex */
3657 adapter->tx_timeout_factor = 1;
3658 switch (adapter->link_speed) {
3659 case SPEED_10:
3660 adapter->tx_timeout_factor = 14;
3661 break;
3662 case SPEED_100:
3663 /* maybe add some timeout factor ? */
3664 break;
3667 netif_carrier_on(netdev);
3669 igb_ping_all_vfs(adapter);
3670 igb_check_vf_rate_limit(adapter);
3672 /* link state has changed, schedule phy info update */
3673 if (!test_bit(__IGB_DOWN, &adapter->state))
3674 mod_timer(&adapter->phy_info_timer,
3675 round_jiffies(jiffies + 2 * HZ));
3677 } else {
3678 if (netif_carrier_ok(netdev)) {
3679 adapter->link_speed = 0;
3680 adapter->link_duplex = 0;
3682 /* check for thermal sensor event */
3683 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3684 printk(KERN_ERR "igb: %s The network adapter "
3685 "was stopped because it "
3686 "overheated.\n",
3687 netdev->name);
3690 /* Links status message must follow this format */
3691 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3692 netdev->name);
3693 netif_carrier_off(netdev);
3695 igb_ping_all_vfs(adapter);
3697 /* link state has changed, schedule phy info update */
3698 if (!test_bit(__IGB_DOWN, &adapter->state))
3699 mod_timer(&adapter->phy_info_timer,
3700 round_jiffies(jiffies + 2 * HZ));
3704 spin_lock(&adapter->stats64_lock);
3705 igb_update_stats(adapter, &adapter->stats64);
3706 spin_unlock(&adapter->stats64_lock);
3708 for (i = 0; i < adapter->num_tx_queues; i++) {
3709 struct igb_ring *tx_ring = adapter->tx_ring[i];
3710 if (!netif_carrier_ok(netdev)) {
3711 /* We've lost link, so the controller stops DMA,
3712 * but we've got queued Tx work that's never going
3713 * to get done, so reset controller to flush Tx.
3714 * (Do the reset outside of interrupt context). */
3715 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3716 adapter->tx_timeout_count++;
3717 schedule_work(&adapter->reset_task);
3718 /* return immediately since reset is imminent */
3719 return;
3723 /* Force detection of hung controller every watchdog period */
3724 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3727 /* Cause software interrupt to ensure rx ring is cleaned */
3728 if (adapter->msix_entries) {
3729 u32 eics = 0;
3730 for (i = 0; i < adapter->num_q_vectors; i++)
3731 eics |= adapter->q_vector[i]->eims_value;
3732 wr32(E1000_EICS, eics);
3733 } else {
3734 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3737 igb_spoof_check(adapter);
3739 /* Reset the timer */
3740 if (!test_bit(__IGB_DOWN, &adapter->state))
3741 mod_timer(&adapter->watchdog_timer,
3742 round_jiffies(jiffies + 2 * HZ));
3745 enum latency_range {
3746 lowest_latency = 0,
3747 low_latency = 1,
3748 bulk_latency = 2,
3749 latency_invalid = 255
3753 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3755 * Stores a new ITR value based on strictly on packet size. This
3756 * algorithm is less sophisticated than that used in igb_update_itr,
3757 * due to the difficulty of synchronizing statistics across multiple
3758 * receive rings. The divisors and thresholds used by this function
3759 * were determined based on theoretical maximum wire speed and testing
3760 * data, in order to minimize response time while increasing bulk
3761 * throughput.
3762 * This functionality is controlled by the InterruptThrottleRate module
3763 * parameter (see igb_param.c)
3764 * NOTE: This function is called only when operating in a multiqueue
3765 * receive environment.
3766 * @q_vector: pointer to q_vector
3768 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3770 int new_val = q_vector->itr_val;
3771 int avg_wire_size = 0;
3772 struct igb_adapter *adapter = q_vector->adapter;
3773 unsigned int packets;
3775 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3776 * ints/sec - ITR timer value of 120 ticks.
3778 if (adapter->link_speed != SPEED_1000) {
3779 new_val = IGB_4K_ITR;
3780 goto set_itr_val;
3783 packets = q_vector->rx.total_packets;
3784 if (packets)
3785 avg_wire_size = q_vector->rx.total_bytes / packets;
3787 packets = q_vector->tx.total_packets;
3788 if (packets)
3789 avg_wire_size = max_t(u32, avg_wire_size,
3790 q_vector->tx.total_bytes / packets);
3792 /* if avg_wire_size isn't set no work was done */
3793 if (!avg_wire_size)
3794 goto clear_counts;
3796 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3797 avg_wire_size += 24;
3799 /* Don't starve jumbo frames */
3800 avg_wire_size = min(avg_wire_size, 3000);
3802 /* Give a little boost to mid-size frames */
3803 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3804 new_val = avg_wire_size / 3;
3805 else
3806 new_val = avg_wire_size / 2;
3808 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3809 if (new_val < IGB_20K_ITR &&
3810 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3811 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3812 new_val = IGB_20K_ITR;
3814 set_itr_val:
3815 if (new_val != q_vector->itr_val) {
3816 q_vector->itr_val = new_val;
3817 q_vector->set_itr = 1;
3819 clear_counts:
3820 q_vector->rx.total_bytes = 0;
3821 q_vector->rx.total_packets = 0;
3822 q_vector->tx.total_bytes = 0;
3823 q_vector->tx.total_packets = 0;
3827 * igb_update_itr - update the dynamic ITR value based on statistics
3828 * Stores a new ITR value based on packets and byte
3829 * counts during the last interrupt. The advantage of per interrupt
3830 * computation is faster updates and more accurate ITR for the current
3831 * traffic pattern. Constants in this function were computed
3832 * based on theoretical maximum wire speed and thresholds were set based
3833 * on testing data as well as attempting to minimize response time
3834 * while increasing bulk throughput.
3835 * this functionality is controlled by the InterruptThrottleRate module
3836 * parameter (see igb_param.c)
3837 * NOTE: These calculations are only valid when operating in a single-
3838 * queue environment.
3839 * @q_vector: pointer to q_vector
3840 * @ring_container: ring info to update the itr for
3842 static void igb_update_itr(struct igb_q_vector *q_vector,
3843 struct igb_ring_container *ring_container)
3845 unsigned int packets = ring_container->total_packets;
3846 unsigned int bytes = ring_container->total_bytes;
3847 u8 itrval = ring_container->itr;
3849 /* no packets, exit with status unchanged */
3850 if (packets == 0)
3851 return;
3853 switch (itrval) {
3854 case lowest_latency:
3855 /* handle TSO and jumbo frames */
3856 if (bytes/packets > 8000)
3857 itrval = bulk_latency;
3858 else if ((packets < 5) && (bytes > 512))
3859 itrval = low_latency;
3860 break;
3861 case low_latency: /* 50 usec aka 20000 ints/s */
3862 if (bytes > 10000) {
3863 /* this if handles the TSO accounting */
3864 if (bytes/packets > 8000) {
3865 itrval = bulk_latency;
3866 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3867 itrval = bulk_latency;
3868 } else if ((packets > 35)) {
3869 itrval = lowest_latency;
3871 } else if (bytes/packets > 2000) {
3872 itrval = bulk_latency;
3873 } else if (packets <= 2 && bytes < 512) {
3874 itrval = lowest_latency;
3876 break;
3877 case bulk_latency: /* 250 usec aka 4000 ints/s */
3878 if (bytes > 25000) {
3879 if (packets > 35)
3880 itrval = low_latency;
3881 } else if (bytes < 1500) {
3882 itrval = low_latency;
3884 break;
3887 /* clear work counters since we have the values we need */
3888 ring_container->total_bytes = 0;
3889 ring_container->total_packets = 0;
3891 /* write updated itr to ring container */
3892 ring_container->itr = itrval;
3895 static void igb_set_itr(struct igb_q_vector *q_vector)
3897 struct igb_adapter *adapter = q_vector->adapter;
3898 u32 new_itr = q_vector->itr_val;
3899 u8 current_itr = 0;
3901 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3902 if (adapter->link_speed != SPEED_1000) {
3903 current_itr = 0;
3904 new_itr = IGB_4K_ITR;
3905 goto set_itr_now;
3908 igb_update_itr(q_vector, &q_vector->tx);
3909 igb_update_itr(q_vector, &q_vector->rx);
3911 current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3913 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3914 if (current_itr == lowest_latency &&
3915 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3916 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3917 current_itr = low_latency;
3919 switch (current_itr) {
3920 /* counts and packets in update_itr are dependent on these numbers */
3921 case lowest_latency:
3922 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3923 break;
3924 case low_latency:
3925 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3926 break;
3927 case bulk_latency:
3928 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
3929 break;
3930 default:
3931 break;
3934 set_itr_now:
3935 if (new_itr != q_vector->itr_val) {
3936 /* this attempts to bias the interrupt rate towards Bulk
3937 * by adding intermediate steps when interrupt rate is
3938 * increasing */
3939 new_itr = new_itr > q_vector->itr_val ?
3940 max((new_itr * q_vector->itr_val) /
3941 (new_itr + (q_vector->itr_val >> 2)),
3942 new_itr) :
3943 new_itr;
3944 /* Don't write the value here; it resets the adapter's
3945 * internal timer, and causes us to delay far longer than
3946 * we should between interrupts. Instead, we write the ITR
3947 * value at the beginning of the next interrupt so the timing
3948 * ends up being correct.
3950 q_vector->itr_val = new_itr;
3951 q_vector->set_itr = 1;
3955 void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3956 u32 type_tucmd, u32 mss_l4len_idx)
3958 struct e1000_adv_tx_context_desc *context_desc;
3959 u16 i = tx_ring->next_to_use;
3961 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3963 i++;
3964 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
3966 /* set bits to identify this as an advanced context descriptor */
3967 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3969 /* For 82575, context index must be unique per ring. */
3970 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
3971 mss_l4len_idx |= tx_ring->reg_idx << 4;
3973 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
3974 context_desc->seqnum_seed = 0;
3975 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
3976 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3979 static int igb_tso(struct igb_ring *tx_ring,
3980 struct igb_tx_buffer *first,
3981 u8 *hdr_len)
3983 struct sk_buff *skb = first->skb;
3984 u32 vlan_macip_lens, type_tucmd;
3985 u32 mss_l4len_idx, l4len;
3987 if (!skb_is_gso(skb))
3988 return 0;
3990 if (skb_header_cloned(skb)) {
3991 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3992 if (err)
3993 return err;
3996 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3997 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
3999 if (first->protocol == __constant_htons(ETH_P_IP)) {
4000 struct iphdr *iph = ip_hdr(skb);
4001 iph->tot_len = 0;
4002 iph->check = 0;
4003 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4004 iph->daddr, 0,
4005 IPPROTO_TCP,
4007 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4008 first->tx_flags |= IGB_TX_FLAGS_TSO |
4009 IGB_TX_FLAGS_CSUM |
4010 IGB_TX_FLAGS_IPV4;
4011 } else if (skb_is_gso_v6(skb)) {
4012 ipv6_hdr(skb)->payload_len = 0;
4013 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4014 &ipv6_hdr(skb)->daddr,
4015 0, IPPROTO_TCP, 0);
4016 first->tx_flags |= IGB_TX_FLAGS_TSO |
4017 IGB_TX_FLAGS_CSUM;
4020 /* compute header lengths */
4021 l4len = tcp_hdrlen(skb);
4022 *hdr_len = skb_transport_offset(skb) + l4len;
4024 /* update gso size and bytecount with header size */
4025 first->gso_segs = skb_shinfo(skb)->gso_segs;
4026 first->bytecount += (first->gso_segs - 1) * *hdr_len;
4028 /* MSS L4LEN IDX */
4029 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4030 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4032 /* VLAN MACLEN IPLEN */
4033 vlan_macip_lens = skb_network_header_len(skb);
4034 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4035 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4037 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4039 return 1;
4042 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4044 struct sk_buff *skb = first->skb;
4045 u32 vlan_macip_lens = 0;
4046 u32 mss_l4len_idx = 0;
4047 u32 type_tucmd = 0;
4049 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4050 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4051 return;
4052 } else {
4053 u8 l4_hdr = 0;
4054 switch (first->protocol) {
4055 case __constant_htons(ETH_P_IP):
4056 vlan_macip_lens |= skb_network_header_len(skb);
4057 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4058 l4_hdr = ip_hdr(skb)->protocol;
4059 break;
4060 case __constant_htons(ETH_P_IPV6):
4061 vlan_macip_lens |= skb_network_header_len(skb);
4062 l4_hdr = ipv6_hdr(skb)->nexthdr;
4063 break;
4064 default:
4065 if (unlikely(net_ratelimit())) {
4066 dev_warn(tx_ring->dev,
4067 "partial checksum but proto=%x!\n",
4068 first->protocol);
4070 break;
4073 switch (l4_hdr) {
4074 case IPPROTO_TCP:
4075 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4076 mss_l4len_idx = tcp_hdrlen(skb) <<
4077 E1000_ADVTXD_L4LEN_SHIFT;
4078 break;
4079 case IPPROTO_SCTP:
4080 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4081 mss_l4len_idx = sizeof(struct sctphdr) <<
4082 E1000_ADVTXD_L4LEN_SHIFT;
4083 break;
4084 case IPPROTO_UDP:
4085 mss_l4len_idx = sizeof(struct udphdr) <<
4086 E1000_ADVTXD_L4LEN_SHIFT;
4087 break;
4088 default:
4089 if (unlikely(net_ratelimit())) {
4090 dev_warn(tx_ring->dev,
4091 "partial checksum but l4 proto=%x!\n",
4092 l4_hdr);
4094 break;
4097 /* update TX checksum flag */
4098 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4101 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4102 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4104 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4107 static __le32 igb_tx_cmd_type(u32 tx_flags)
4109 /* set type for advanced descriptor with frame checksum insertion */
4110 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4111 E1000_ADVTXD_DCMD_IFCS |
4112 E1000_ADVTXD_DCMD_DEXT);
4114 /* set HW vlan bit if vlan is present */
4115 if (tx_flags & IGB_TX_FLAGS_VLAN)
4116 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4118 /* set timestamp bit if present */
4119 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4120 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4122 /* set segmentation bits for TSO */
4123 if (tx_flags & IGB_TX_FLAGS_TSO)
4124 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4126 return cmd_type;
4129 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4130 union e1000_adv_tx_desc *tx_desc,
4131 u32 tx_flags, unsigned int paylen)
4133 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4135 /* 82575 requires a unique index per ring if any offload is enabled */
4136 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4137 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4138 olinfo_status |= tx_ring->reg_idx << 4;
4140 /* insert L4 checksum */
4141 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4142 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4144 /* insert IPv4 checksum */
4145 if (tx_flags & IGB_TX_FLAGS_IPV4)
4146 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4149 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4153 * The largest size we can write to the descriptor is 65535. In order to
4154 * maintain a power of two alignment we have to limit ourselves to 32K.
4156 #define IGB_MAX_TXD_PWR 15
4157 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4159 static void igb_tx_map(struct igb_ring *tx_ring,
4160 struct igb_tx_buffer *first,
4161 const u8 hdr_len)
4163 struct sk_buff *skb = first->skb;
4164 struct igb_tx_buffer *tx_buffer_info;
4165 union e1000_adv_tx_desc *tx_desc;
4166 dma_addr_t dma;
4167 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4168 unsigned int data_len = skb->data_len;
4169 unsigned int size = skb_headlen(skb);
4170 unsigned int paylen = skb->len - hdr_len;
4171 __le32 cmd_type;
4172 u32 tx_flags = first->tx_flags;
4173 u16 i = tx_ring->next_to_use;
4175 tx_desc = IGB_TX_DESC(tx_ring, i);
4177 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4178 cmd_type = igb_tx_cmd_type(tx_flags);
4180 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4181 if (dma_mapping_error(tx_ring->dev, dma))
4182 goto dma_error;
4184 /* record length, and DMA address */
4185 first->length = size;
4186 first->dma = dma;
4187 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4189 for (;;) {
4190 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4191 tx_desc->read.cmd_type_len =
4192 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4194 i++;
4195 tx_desc++;
4196 if (i == tx_ring->count) {
4197 tx_desc = IGB_TX_DESC(tx_ring, 0);
4198 i = 0;
4201 dma += IGB_MAX_DATA_PER_TXD;
4202 size -= IGB_MAX_DATA_PER_TXD;
4204 tx_desc->read.olinfo_status = 0;
4205 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4208 if (likely(!data_len))
4209 break;
4211 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4213 i++;
4214 tx_desc++;
4215 if (i == tx_ring->count) {
4216 tx_desc = IGB_TX_DESC(tx_ring, 0);
4217 i = 0;
4220 size = skb_frag_size(frag);
4221 data_len -= size;
4223 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4224 size, DMA_TO_DEVICE);
4225 if (dma_mapping_error(tx_ring->dev, dma))
4226 goto dma_error;
4228 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4229 tx_buffer_info->length = size;
4230 tx_buffer_info->dma = dma;
4232 tx_desc->read.olinfo_status = 0;
4233 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4235 frag++;
4238 /* write last descriptor with RS and EOP bits */
4239 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4240 tx_desc->read.cmd_type_len = cmd_type;
4242 /* set the timestamp */
4243 first->time_stamp = jiffies;
4246 * Force memory writes to complete before letting h/w know there
4247 * are new descriptors to fetch. (Only applicable for weak-ordered
4248 * memory model archs, such as IA-64).
4250 * We also need this memory barrier to make certain all of the
4251 * status bits have been updated before next_to_watch is written.
4253 wmb();
4255 /* set next_to_watch value indicating a packet is present */
4256 first->next_to_watch = tx_desc;
4258 i++;
4259 if (i == tx_ring->count)
4260 i = 0;
4262 tx_ring->next_to_use = i;
4264 writel(i, tx_ring->tail);
4266 /* we need this if more than one processor can write to our tail
4267 * at a time, it syncronizes IO on IA64/Altix systems */
4268 mmiowb();
4270 return;
4272 dma_error:
4273 dev_err(tx_ring->dev, "TX DMA map failed\n");
4275 /* clear dma mappings for failed tx_buffer_info map */
4276 for (;;) {
4277 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4278 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4279 if (tx_buffer_info == first)
4280 break;
4281 if (i == 0)
4282 i = tx_ring->count;
4283 i--;
4286 tx_ring->next_to_use = i;
4289 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4291 struct net_device *netdev = tx_ring->netdev;
4293 netif_stop_subqueue(netdev, tx_ring->queue_index);
4295 /* Herbert's original patch had:
4296 * smp_mb__after_netif_stop_queue();
4297 * but since that doesn't exist yet, just open code it. */
4298 smp_mb();
4300 /* We need to check again in a case another CPU has just
4301 * made room available. */
4302 if (igb_desc_unused(tx_ring) < size)
4303 return -EBUSY;
4305 /* A reprieve! */
4306 netif_wake_subqueue(netdev, tx_ring->queue_index);
4308 u64_stats_update_begin(&tx_ring->tx_syncp2);
4309 tx_ring->tx_stats.restart_queue2++;
4310 u64_stats_update_end(&tx_ring->tx_syncp2);
4312 return 0;
4315 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4317 if (igb_desc_unused(tx_ring) >= size)
4318 return 0;
4319 return __igb_maybe_stop_tx(tx_ring, size);
4322 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4323 struct igb_ring *tx_ring)
4325 struct igb_tx_buffer *first;
4326 int tso;
4327 u32 tx_flags = 0;
4328 __be16 protocol = vlan_get_protocol(skb);
4329 u8 hdr_len = 0;
4331 /* need: 1 descriptor per page,
4332 * + 2 desc gap to keep tail from touching head,
4333 * + 1 desc for skb->data,
4334 * + 1 desc for context descriptor,
4335 * otherwise try next time */
4336 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4337 /* this is a hard error */
4338 return NETDEV_TX_BUSY;
4341 /* record the location of the first descriptor for this packet */
4342 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4343 first->skb = skb;
4344 first->bytecount = skb->len;
4345 first->gso_segs = 1;
4347 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4348 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4349 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4352 if (vlan_tx_tag_present(skb)) {
4353 tx_flags |= IGB_TX_FLAGS_VLAN;
4354 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4357 /* record initial flags and protocol */
4358 first->tx_flags = tx_flags;
4359 first->protocol = protocol;
4361 tso = igb_tso(tx_ring, first, &hdr_len);
4362 if (tso < 0)
4363 goto out_drop;
4364 else if (!tso)
4365 igb_tx_csum(tx_ring, first);
4367 igb_tx_map(tx_ring, first, hdr_len);
4369 /* Make sure there is space in the ring for the next send. */
4370 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4372 return NETDEV_TX_OK;
4374 out_drop:
4375 igb_unmap_and_free_tx_resource(tx_ring, first);
4377 return NETDEV_TX_OK;
4380 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4381 struct sk_buff *skb)
4383 unsigned int r_idx = skb->queue_mapping;
4385 if (r_idx >= adapter->num_tx_queues)
4386 r_idx = r_idx % adapter->num_tx_queues;
4388 return adapter->tx_ring[r_idx];
4391 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4392 struct net_device *netdev)
4394 struct igb_adapter *adapter = netdev_priv(netdev);
4396 if (test_bit(__IGB_DOWN, &adapter->state)) {
4397 dev_kfree_skb_any(skb);
4398 return NETDEV_TX_OK;
4401 if (skb->len <= 0) {
4402 dev_kfree_skb_any(skb);
4403 return NETDEV_TX_OK;
4407 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4408 * in order to meet this minimum size requirement.
4410 if (skb->len < 17) {
4411 if (skb_padto(skb, 17))
4412 return NETDEV_TX_OK;
4413 skb->len = 17;
4416 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4420 * igb_tx_timeout - Respond to a Tx Hang
4421 * @netdev: network interface device structure
4423 static void igb_tx_timeout(struct net_device *netdev)
4425 struct igb_adapter *adapter = netdev_priv(netdev);
4426 struct e1000_hw *hw = &adapter->hw;
4428 /* Do the reset outside of interrupt context */
4429 adapter->tx_timeout_count++;
4431 if (hw->mac.type >= e1000_82580)
4432 hw->dev_spec._82575.global_device_reset = true;
4434 schedule_work(&adapter->reset_task);
4435 wr32(E1000_EICS,
4436 (adapter->eims_enable_mask & ~adapter->eims_other));
4439 static void igb_reset_task(struct work_struct *work)
4441 struct igb_adapter *adapter;
4442 adapter = container_of(work, struct igb_adapter, reset_task);
4444 igb_dump(adapter);
4445 netdev_err(adapter->netdev, "Reset adapter\n");
4446 igb_reinit_locked(adapter);
4450 * igb_get_stats64 - Get System Network Statistics
4451 * @netdev: network interface device structure
4452 * @stats: rtnl_link_stats64 pointer
4455 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4456 struct rtnl_link_stats64 *stats)
4458 struct igb_adapter *adapter = netdev_priv(netdev);
4460 spin_lock(&adapter->stats64_lock);
4461 igb_update_stats(adapter, &adapter->stats64);
4462 memcpy(stats, &adapter->stats64, sizeof(*stats));
4463 spin_unlock(&adapter->stats64_lock);
4465 return stats;
4469 * igb_change_mtu - Change the Maximum Transfer Unit
4470 * @netdev: network interface device structure
4471 * @new_mtu: new value for maximum frame size
4473 * Returns 0 on success, negative on failure
4475 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4477 struct igb_adapter *adapter = netdev_priv(netdev);
4478 struct pci_dev *pdev = adapter->pdev;
4479 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4481 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4482 dev_err(&pdev->dev, "Invalid MTU setting\n");
4483 return -EINVAL;
4486 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4487 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4488 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4489 return -EINVAL;
4492 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4493 msleep(1);
4495 /* igb_down has a dependency on max_frame_size */
4496 adapter->max_frame_size = max_frame;
4498 if (netif_running(netdev))
4499 igb_down(adapter);
4501 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4502 netdev->mtu, new_mtu);
4503 netdev->mtu = new_mtu;
4505 if (netif_running(netdev))
4506 igb_up(adapter);
4507 else
4508 igb_reset(adapter);
4510 clear_bit(__IGB_RESETTING, &adapter->state);
4512 return 0;
4516 * igb_update_stats - Update the board statistics counters
4517 * @adapter: board private structure
4520 void igb_update_stats(struct igb_adapter *adapter,
4521 struct rtnl_link_stats64 *net_stats)
4523 struct e1000_hw *hw = &adapter->hw;
4524 struct pci_dev *pdev = adapter->pdev;
4525 u32 reg, mpc;
4526 u16 phy_tmp;
4527 int i;
4528 u64 bytes, packets;
4529 unsigned int start;
4530 u64 _bytes, _packets;
4532 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4535 * Prevent stats update while adapter is being reset, or if the pci
4536 * connection is down.
4538 if (adapter->link_speed == 0)
4539 return;
4540 if (pci_channel_offline(pdev))
4541 return;
4543 bytes = 0;
4544 packets = 0;
4545 for (i = 0; i < adapter->num_rx_queues; i++) {
4546 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4547 struct igb_ring *ring = adapter->rx_ring[i];
4549 ring->rx_stats.drops += rqdpc_tmp;
4550 net_stats->rx_fifo_errors += rqdpc_tmp;
4552 do {
4553 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4554 _bytes = ring->rx_stats.bytes;
4555 _packets = ring->rx_stats.packets;
4556 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4557 bytes += _bytes;
4558 packets += _packets;
4561 net_stats->rx_bytes = bytes;
4562 net_stats->rx_packets = packets;
4564 bytes = 0;
4565 packets = 0;
4566 for (i = 0; i < adapter->num_tx_queues; i++) {
4567 struct igb_ring *ring = adapter->tx_ring[i];
4568 do {
4569 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4570 _bytes = ring->tx_stats.bytes;
4571 _packets = ring->tx_stats.packets;
4572 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4573 bytes += _bytes;
4574 packets += _packets;
4576 net_stats->tx_bytes = bytes;
4577 net_stats->tx_packets = packets;
4579 /* read stats registers */
4580 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4581 adapter->stats.gprc += rd32(E1000_GPRC);
4582 adapter->stats.gorc += rd32(E1000_GORCL);
4583 rd32(E1000_GORCH); /* clear GORCL */
4584 adapter->stats.bprc += rd32(E1000_BPRC);
4585 adapter->stats.mprc += rd32(E1000_MPRC);
4586 adapter->stats.roc += rd32(E1000_ROC);
4588 adapter->stats.prc64 += rd32(E1000_PRC64);
4589 adapter->stats.prc127 += rd32(E1000_PRC127);
4590 adapter->stats.prc255 += rd32(E1000_PRC255);
4591 adapter->stats.prc511 += rd32(E1000_PRC511);
4592 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4593 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4594 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4595 adapter->stats.sec += rd32(E1000_SEC);
4597 mpc = rd32(E1000_MPC);
4598 adapter->stats.mpc += mpc;
4599 net_stats->rx_fifo_errors += mpc;
4600 adapter->stats.scc += rd32(E1000_SCC);
4601 adapter->stats.ecol += rd32(E1000_ECOL);
4602 adapter->stats.mcc += rd32(E1000_MCC);
4603 adapter->stats.latecol += rd32(E1000_LATECOL);
4604 adapter->stats.dc += rd32(E1000_DC);
4605 adapter->stats.rlec += rd32(E1000_RLEC);
4606 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4607 adapter->stats.xontxc += rd32(E1000_XONTXC);
4608 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4609 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4610 adapter->stats.fcruc += rd32(E1000_FCRUC);
4611 adapter->stats.gptc += rd32(E1000_GPTC);
4612 adapter->stats.gotc += rd32(E1000_GOTCL);
4613 rd32(E1000_GOTCH); /* clear GOTCL */
4614 adapter->stats.rnbc += rd32(E1000_RNBC);
4615 adapter->stats.ruc += rd32(E1000_RUC);
4616 adapter->stats.rfc += rd32(E1000_RFC);
4617 adapter->stats.rjc += rd32(E1000_RJC);
4618 adapter->stats.tor += rd32(E1000_TORH);
4619 adapter->stats.tot += rd32(E1000_TOTH);
4620 adapter->stats.tpr += rd32(E1000_TPR);
4622 adapter->stats.ptc64 += rd32(E1000_PTC64);
4623 adapter->stats.ptc127 += rd32(E1000_PTC127);
4624 adapter->stats.ptc255 += rd32(E1000_PTC255);
4625 adapter->stats.ptc511 += rd32(E1000_PTC511);
4626 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4627 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4629 adapter->stats.mptc += rd32(E1000_MPTC);
4630 adapter->stats.bptc += rd32(E1000_BPTC);
4632 adapter->stats.tpt += rd32(E1000_TPT);
4633 adapter->stats.colc += rd32(E1000_COLC);
4635 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4636 /* read internal phy specific stats */
4637 reg = rd32(E1000_CTRL_EXT);
4638 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4639 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4640 adapter->stats.tncrs += rd32(E1000_TNCRS);
4643 adapter->stats.tsctc += rd32(E1000_TSCTC);
4644 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4646 adapter->stats.iac += rd32(E1000_IAC);
4647 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4648 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4649 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4650 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4651 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4652 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4653 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4654 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4656 /* Fill out the OS statistics structure */
4657 net_stats->multicast = adapter->stats.mprc;
4658 net_stats->collisions = adapter->stats.colc;
4660 /* Rx Errors */
4662 /* RLEC on some newer hardware can be incorrect so build
4663 * our own version based on RUC and ROC */
4664 net_stats->rx_errors = adapter->stats.rxerrc +
4665 adapter->stats.crcerrs + adapter->stats.algnerrc +
4666 adapter->stats.ruc + adapter->stats.roc +
4667 adapter->stats.cexterr;
4668 net_stats->rx_length_errors = adapter->stats.ruc +
4669 adapter->stats.roc;
4670 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4671 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4672 net_stats->rx_missed_errors = adapter->stats.mpc;
4674 /* Tx Errors */
4675 net_stats->tx_errors = adapter->stats.ecol +
4676 adapter->stats.latecol;
4677 net_stats->tx_aborted_errors = adapter->stats.ecol;
4678 net_stats->tx_window_errors = adapter->stats.latecol;
4679 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4681 /* Tx Dropped needs to be maintained elsewhere */
4683 /* Phy Stats */
4684 if (hw->phy.media_type == e1000_media_type_copper) {
4685 if ((adapter->link_speed == SPEED_1000) &&
4686 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4687 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4688 adapter->phy_stats.idle_errors += phy_tmp;
4692 /* Management Stats */
4693 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4694 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4695 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4697 /* OS2BMC Stats */
4698 reg = rd32(E1000_MANC);
4699 if (reg & E1000_MANC_EN_BMC2OS) {
4700 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4701 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4702 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4703 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4707 static irqreturn_t igb_msix_other(int irq, void *data)
4709 struct igb_adapter *adapter = data;
4710 struct e1000_hw *hw = &adapter->hw;
4711 u32 icr = rd32(E1000_ICR);
4712 /* reading ICR causes bit 31 of EICR to be cleared */
4714 if (icr & E1000_ICR_DRSTA)
4715 schedule_work(&adapter->reset_task);
4717 if (icr & E1000_ICR_DOUTSYNC) {
4718 /* HW is reporting DMA is out of sync */
4719 adapter->stats.doosync++;
4720 /* The DMA Out of Sync is also indication of a spoof event
4721 * in IOV mode. Check the Wrong VM Behavior register to
4722 * see if it is really a spoof event. */
4723 igb_check_wvbr(adapter);
4726 /* Check for a mailbox event */
4727 if (icr & E1000_ICR_VMMB)
4728 igb_msg_task(adapter);
4730 if (icr & E1000_ICR_LSC) {
4731 hw->mac.get_link_status = 1;
4732 /* guard against interrupt when we're going down */
4733 if (!test_bit(__IGB_DOWN, &adapter->state))
4734 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4737 wr32(E1000_EIMS, adapter->eims_other);
4739 return IRQ_HANDLED;
4742 static void igb_write_itr(struct igb_q_vector *q_vector)
4744 struct igb_adapter *adapter = q_vector->adapter;
4745 u32 itr_val = q_vector->itr_val & 0x7FFC;
4747 if (!q_vector->set_itr)
4748 return;
4750 if (!itr_val)
4751 itr_val = 0x4;
4753 if (adapter->hw.mac.type == e1000_82575)
4754 itr_val |= itr_val << 16;
4755 else
4756 itr_val |= E1000_EITR_CNT_IGNR;
4758 writel(itr_val, q_vector->itr_register);
4759 q_vector->set_itr = 0;
4762 static irqreturn_t igb_msix_ring(int irq, void *data)
4764 struct igb_q_vector *q_vector = data;
4766 /* Write the ITR value calculated from the previous interrupt. */
4767 igb_write_itr(q_vector);
4769 napi_schedule(&q_vector->napi);
4771 return IRQ_HANDLED;
4774 #ifdef CONFIG_IGB_DCA
4775 static void igb_update_dca(struct igb_q_vector *q_vector)
4777 struct igb_adapter *adapter = q_vector->adapter;
4778 struct e1000_hw *hw = &adapter->hw;
4779 int cpu = get_cpu();
4781 if (q_vector->cpu == cpu)
4782 goto out_no_update;
4784 if (q_vector->tx.ring) {
4785 int q = q_vector->tx.ring->reg_idx;
4786 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4787 if (hw->mac.type == e1000_82575) {
4788 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4789 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4790 } else {
4791 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4792 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4793 E1000_DCA_TXCTRL_CPUID_SHIFT;
4795 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4796 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4798 if (q_vector->rx.ring) {
4799 int q = q_vector->rx.ring->reg_idx;
4800 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4801 if (hw->mac.type == e1000_82575) {
4802 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4803 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4804 } else {
4805 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4806 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4807 E1000_DCA_RXCTRL_CPUID_SHIFT;
4809 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4810 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4811 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4812 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4814 q_vector->cpu = cpu;
4815 out_no_update:
4816 put_cpu();
4819 static void igb_setup_dca(struct igb_adapter *adapter)
4821 struct e1000_hw *hw = &adapter->hw;
4822 int i;
4824 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4825 return;
4827 /* Always use CB2 mode, difference is masked in the CB driver. */
4828 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4830 for (i = 0; i < adapter->num_q_vectors; i++) {
4831 adapter->q_vector[i]->cpu = -1;
4832 igb_update_dca(adapter->q_vector[i]);
4836 static int __igb_notify_dca(struct device *dev, void *data)
4838 struct net_device *netdev = dev_get_drvdata(dev);
4839 struct igb_adapter *adapter = netdev_priv(netdev);
4840 struct pci_dev *pdev = adapter->pdev;
4841 struct e1000_hw *hw = &adapter->hw;
4842 unsigned long event = *(unsigned long *)data;
4844 switch (event) {
4845 case DCA_PROVIDER_ADD:
4846 /* if already enabled, don't do it again */
4847 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4848 break;
4849 if (dca_add_requester(dev) == 0) {
4850 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4851 dev_info(&pdev->dev, "DCA enabled\n");
4852 igb_setup_dca(adapter);
4853 break;
4855 /* Fall Through since DCA is disabled. */
4856 case DCA_PROVIDER_REMOVE:
4857 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4858 /* without this a class_device is left
4859 * hanging around in the sysfs model */
4860 dca_remove_requester(dev);
4861 dev_info(&pdev->dev, "DCA disabled\n");
4862 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4863 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4865 break;
4868 return 0;
4871 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4872 void *p)
4874 int ret_val;
4876 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4877 __igb_notify_dca);
4879 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4881 #endif /* CONFIG_IGB_DCA */
4883 #ifdef CONFIG_PCI_IOV
4884 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4886 unsigned char mac_addr[ETH_ALEN];
4887 struct pci_dev *pdev = adapter->pdev;
4888 struct e1000_hw *hw = &adapter->hw;
4889 struct pci_dev *pvfdev;
4890 unsigned int device_id;
4891 u16 thisvf_devfn;
4893 random_ether_addr(mac_addr);
4894 igb_set_vf_mac(adapter, vf, mac_addr);
4896 switch (adapter->hw.mac.type) {
4897 case e1000_82576:
4898 device_id = IGB_82576_VF_DEV_ID;
4899 /* VF Stride for 82576 is 2 */
4900 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4901 (pdev->devfn & 1);
4902 break;
4903 case e1000_i350:
4904 device_id = IGB_I350_VF_DEV_ID;
4905 /* VF Stride for I350 is 4 */
4906 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4907 (pdev->devfn & 3);
4908 break;
4909 default:
4910 device_id = 0;
4911 thisvf_devfn = 0;
4912 break;
4915 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4916 while (pvfdev) {
4917 if (pvfdev->devfn == thisvf_devfn)
4918 break;
4919 pvfdev = pci_get_device(hw->vendor_id,
4920 device_id, pvfdev);
4923 if (pvfdev)
4924 adapter->vf_data[vf].vfdev = pvfdev;
4925 else
4926 dev_err(&pdev->dev,
4927 "Couldn't find pci dev ptr for VF %4.4x\n",
4928 thisvf_devfn);
4929 return pvfdev != NULL;
4932 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4934 struct e1000_hw *hw = &adapter->hw;
4935 struct pci_dev *pdev = adapter->pdev;
4936 struct pci_dev *pvfdev;
4937 u16 vf_devfn = 0;
4938 u16 vf_stride;
4939 unsigned int device_id;
4940 int vfs_found = 0;
4942 switch (adapter->hw.mac.type) {
4943 case e1000_82576:
4944 device_id = IGB_82576_VF_DEV_ID;
4945 /* VF Stride for 82576 is 2 */
4946 vf_stride = 2;
4947 break;
4948 case e1000_i350:
4949 device_id = IGB_I350_VF_DEV_ID;
4950 /* VF Stride for I350 is 4 */
4951 vf_stride = 4;
4952 break;
4953 default:
4954 device_id = 0;
4955 vf_stride = 0;
4956 break;
4959 vf_devfn = pdev->devfn + 0x80;
4960 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4961 while (pvfdev) {
4962 if (pvfdev->devfn == vf_devfn)
4963 vfs_found++;
4964 vf_devfn += vf_stride;
4965 pvfdev = pci_get_device(hw->vendor_id,
4966 device_id, pvfdev);
4969 return vfs_found;
4972 static int igb_check_vf_assignment(struct igb_adapter *adapter)
4974 int i;
4975 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4976 if (adapter->vf_data[i].vfdev) {
4977 if (adapter->vf_data[i].vfdev->dev_flags &
4978 PCI_DEV_FLAGS_ASSIGNED)
4979 return true;
4982 return false;
4985 #endif
4986 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4988 struct e1000_hw *hw = &adapter->hw;
4989 u32 ping;
4990 int i;
4992 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4993 ping = E1000_PF_CONTROL_MSG;
4994 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4995 ping |= E1000_VT_MSGTYPE_CTS;
4996 igb_write_mbx(hw, &ping, 1, i);
5000 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5002 struct e1000_hw *hw = &adapter->hw;
5003 u32 vmolr = rd32(E1000_VMOLR(vf));
5004 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5006 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5007 IGB_VF_FLAG_MULTI_PROMISC);
5008 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5010 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5011 vmolr |= E1000_VMOLR_MPME;
5012 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5013 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5014 } else {
5016 * if we have hashes and we are clearing a multicast promisc
5017 * flag we need to write the hashes to the MTA as this step
5018 * was previously skipped
5020 if (vf_data->num_vf_mc_hashes > 30) {
5021 vmolr |= E1000_VMOLR_MPME;
5022 } else if (vf_data->num_vf_mc_hashes) {
5023 int j;
5024 vmolr |= E1000_VMOLR_ROMPE;
5025 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5026 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5030 wr32(E1000_VMOLR(vf), vmolr);
5032 /* there are flags left unprocessed, likely not supported */
5033 if (*msgbuf & E1000_VT_MSGINFO_MASK)
5034 return -EINVAL;
5036 return 0;
5040 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5041 u32 *msgbuf, u32 vf)
5043 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5044 u16 *hash_list = (u16 *)&msgbuf[1];
5045 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5046 int i;
5048 /* salt away the number of multicast addresses assigned
5049 * to this VF for later use to restore when the PF multi cast
5050 * list changes
5052 vf_data->num_vf_mc_hashes = n;
5054 /* only up to 30 hash values supported */
5055 if (n > 30)
5056 n = 30;
5058 /* store the hashes for later use */
5059 for (i = 0; i < n; i++)
5060 vf_data->vf_mc_hashes[i] = hash_list[i];
5062 /* Flush and reset the mta with the new values */
5063 igb_set_rx_mode(adapter->netdev);
5065 return 0;
5068 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5070 struct e1000_hw *hw = &adapter->hw;
5071 struct vf_data_storage *vf_data;
5072 int i, j;
5074 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5075 u32 vmolr = rd32(E1000_VMOLR(i));
5076 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5078 vf_data = &adapter->vf_data[i];
5080 if ((vf_data->num_vf_mc_hashes > 30) ||
5081 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5082 vmolr |= E1000_VMOLR_MPME;
5083 } else if (vf_data->num_vf_mc_hashes) {
5084 vmolr |= E1000_VMOLR_ROMPE;
5085 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5086 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5088 wr32(E1000_VMOLR(i), vmolr);
5092 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5094 struct e1000_hw *hw = &adapter->hw;
5095 u32 pool_mask, reg, vid;
5096 int i;
5098 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5100 /* Find the vlan filter for this id */
5101 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5102 reg = rd32(E1000_VLVF(i));
5104 /* remove the vf from the pool */
5105 reg &= ~pool_mask;
5107 /* if pool is empty then remove entry from vfta */
5108 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5109 (reg & E1000_VLVF_VLANID_ENABLE)) {
5110 reg = 0;
5111 vid = reg & E1000_VLVF_VLANID_MASK;
5112 igb_vfta_set(hw, vid, false);
5115 wr32(E1000_VLVF(i), reg);
5118 adapter->vf_data[vf].vlans_enabled = 0;
5121 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5123 struct e1000_hw *hw = &adapter->hw;
5124 u32 reg, i;
5126 /* The vlvf table only exists on 82576 hardware and newer */
5127 if (hw->mac.type < e1000_82576)
5128 return -1;
5130 /* we only need to do this if VMDq is enabled */
5131 if (!adapter->vfs_allocated_count)
5132 return -1;
5134 /* Find the vlan filter for this id */
5135 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5136 reg = rd32(E1000_VLVF(i));
5137 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5138 vid == (reg & E1000_VLVF_VLANID_MASK))
5139 break;
5142 if (add) {
5143 if (i == E1000_VLVF_ARRAY_SIZE) {
5144 /* Did not find a matching VLAN ID entry that was
5145 * enabled. Search for a free filter entry, i.e.
5146 * one without the enable bit set
5148 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5149 reg = rd32(E1000_VLVF(i));
5150 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5151 break;
5154 if (i < E1000_VLVF_ARRAY_SIZE) {
5155 /* Found an enabled/available entry */
5156 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5158 /* if !enabled we need to set this up in vfta */
5159 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5160 /* add VID to filter table */
5161 igb_vfta_set(hw, vid, true);
5162 reg |= E1000_VLVF_VLANID_ENABLE;
5164 reg &= ~E1000_VLVF_VLANID_MASK;
5165 reg |= vid;
5166 wr32(E1000_VLVF(i), reg);
5168 /* do not modify RLPML for PF devices */
5169 if (vf >= adapter->vfs_allocated_count)
5170 return 0;
5172 if (!adapter->vf_data[vf].vlans_enabled) {
5173 u32 size;
5174 reg = rd32(E1000_VMOLR(vf));
5175 size = reg & E1000_VMOLR_RLPML_MASK;
5176 size += 4;
5177 reg &= ~E1000_VMOLR_RLPML_MASK;
5178 reg |= size;
5179 wr32(E1000_VMOLR(vf), reg);
5182 adapter->vf_data[vf].vlans_enabled++;
5184 } else {
5185 if (i < E1000_VLVF_ARRAY_SIZE) {
5186 /* remove vf from the pool */
5187 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5188 /* if pool is empty then remove entry from vfta */
5189 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5190 reg = 0;
5191 igb_vfta_set(hw, vid, false);
5193 wr32(E1000_VLVF(i), reg);
5195 /* do not modify RLPML for PF devices */
5196 if (vf >= adapter->vfs_allocated_count)
5197 return 0;
5199 adapter->vf_data[vf].vlans_enabled--;
5200 if (!adapter->vf_data[vf].vlans_enabled) {
5201 u32 size;
5202 reg = rd32(E1000_VMOLR(vf));
5203 size = reg & E1000_VMOLR_RLPML_MASK;
5204 size -= 4;
5205 reg &= ~E1000_VMOLR_RLPML_MASK;
5206 reg |= size;
5207 wr32(E1000_VMOLR(vf), reg);
5211 return 0;
5214 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5216 struct e1000_hw *hw = &adapter->hw;
5218 if (vid)
5219 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5220 else
5221 wr32(E1000_VMVIR(vf), 0);
5224 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5225 int vf, u16 vlan, u8 qos)
5227 int err = 0;
5228 struct igb_adapter *adapter = netdev_priv(netdev);
5230 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5231 return -EINVAL;
5232 if (vlan || qos) {
5233 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5234 if (err)
5235 goto out;
5236 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5237 igb_set_vmolr(adapter, vf, !vlan);
5238 adapter->vf_data[vf].pf_vlan = vlan;
5239 adapter->vf_data[vf].pf_qos = qos;
5240 dev_info(&adapter->pdev->dev,
5241 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5242 if (test_bit(__IGB_DOWN, &adapter->state)) {
5243 dev_warn(&adapter->pdev->dev,
5244 "The VF VLAN has been set,"
5245 " but the PF device is not up.\n");
5246 dev_warn(&adapter->pdev->dev,
5247 "Bring the PF device up before"
5248 " attempting to use the VF device.\n");
5250 } else {
5251 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5252 false, vf);
5253 igb_set_vmvir(adapter, vlan, vf);
5254 igb_set_vmolr(adapter, vf, true);
5255 adapter->vf_data[vf].pf_vlan = 0;
5256 adapter->vf_data[vf].pf_qos = 0;
5258 out:
5259 return err;
5262 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5264 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5265 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5267 return igb_vlvf_set(adapter, vid, add, vf);
5270 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5272 /* clear flags - except flag that indicates PF has set the MAC */
5273 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5274 adapter->vf_data[vf].last_nack = jiffies;
5276 /* reset offloads to defaults */
5277 igb_set_vmolr(adapter, vf, true);
5279 /* reset vlans for device */
5280 igb_clear_vf_vfta(adapter, vf);
5281 if (adapter->vf_data[vf].pf_vlan)
5282 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5283 adapter->vf_data[vf].pf_vlan,
5284 adapter->vf_data[vf].pf_qos);
5285 else
5286 igb_clear_vf_vfta(adapter, vf);
5288 /* reset multicast table array for vf */
5289 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5291 /* Flush and reset the mta with the new values */
5292 igb_set_rx_mode(adapter->netdev);
5295 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5297 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5299 /* generate a new mac address as we were hotplug removed/added */
5300 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5301 random_ether_addr(vf_mac);
5303 /* process remaining reset events */
5304 igb_vf_reset(adapter, vf);
5307 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5309 struct e1000_hw *hw = &adapter->hw;
5310 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5311 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5312 u32 reg, msgbuf[3];
5313 u8 *addr = (u8 *)(&msgbuf[1]);
5315 /* process all the same items cleared in a function level reset */
5316 igb_vf_reset(adapter, vf);
5318 /* set vf mac address */
5319 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5321 /* enable transmit and receive for vf */
5322 reg = rd32(E1000_VFTE);
5323 wr32(E1000_VFTE, reg | (1 << vf));
5324 reg = rd32(E1000_VFRE);
5325 wr32(E1000_VFRE, reg | (1 << vf));
5327 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5329 /* reply to reset with ack and vf mac address */
5330 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5331 memcpy(addr, vf_mac, 6);
5332 igb_write_mbx(hw, msgbuf, 3, vf);
5335 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5338 * The VF MAC Address is stored in a packed array of bytes
5339 * starting at the second 32 bit word of the msg array
5341 unsigned char *addr = (char *)&msg[1];
5342 int err = -1;
5344 if (is_valid_ether_addr(addr))
5345 err = igb_set_vf_mac(adapter, vf, addr);
5347 return err;
5350 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5352 struct e1000_hw *hw = &adapter->hw;
5353 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5354 u32 msg = E1000_VT_MSGTYPE_NACK;
5356 /* if device isn't clear to send it shouldn't be reading either */
5357 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5358 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5359 igb_write_mbx(hw, &msg, 1, vf);
5360 vf_data->last_nack = jiffies;
5364 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5366 struct pci_dev *pdev = adapter->pdev;
5367 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5368 struct e1000_hw *hw = &adapter->hw;
5369 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5370 s32 retval;
5372 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5374 if (retval) {
5375 /* if receive failed revoke VF CTS stats and restart init */
5376 dev_err(&pdev->dev, "Error receiving message from VF\n");
5377 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5378 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5379 return;
5380 goto out;
5383 /* this is a message we already processed, do nothing */
5384 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5385 return;
5388 * until the vf completes a reset it should not be
5389 * allowed to start any configuration.
5392 if (msgbuf[0] == E1000_VF_RESET) {
5393 igb_vf_reset_msg(adapter, vf);
5394 return;
5397 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5398 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5399 return;
5400 retval = -1;
5401 goto out;
5404 switch ((msgbuf[0] & 0xFFFF)) {
5405 case E1000_VF_SET_MAC_ADDR:
5406 retval = -EINVAL;
5407 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5408 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5409 else
5410 dev_warn(&pdev->dev,
5411 "VF %d attempted to override administratively "
5412 "set MAC address\nReload the VF driver to "
5413 "resume operations\n", vf);
5414 break;
5415 case E1000_VF_SET_PROMISC:
5416 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5417 break;
5418 case E1000_VF_SET_MULTICAST:
5419 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5420 break;
5421 case E1000_VF_SET_LPE:
5422 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5423 break;
5424 case E1000_VF_SET_VLAN:
5425 retval = -1;
5426 if (vf_data->pf_vlan)
5427 dev_warn(&pdev->dev,
5428 "VF %d attempted to override administratively "
5429 "set VLAN tag\nReload the VF driver to "
5430 "resume operations\n", vf);
5431 else
5432 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5433 break;
5434 default:
5435 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5436 retval = -1;
5437 break;
5440 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5441 out:
5442 /* notify the VF of the results of what it sent us */
5443 if (retval)
5444 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5445 else
5446 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5448 igb_write_mbx(hw, msgbuf, 1, vf);
5451 static void igb_msg_task(struct igb_adapter *adapter)
5453 struct e1000_hw *hw = &adapter->hw;
5454 u32 vf;
5456 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5457 /* process any reset requests */
5458 if (!igb_check_for_rst(hw, vf))
5459 igb_vf_reset_event(adapter, vf);
5461 /* process any messages pending */
5462 if (!igb_check_for_msg(hw, vf))
5463 igb_rcv_msg_from_vf(adapter, vf);
5465 /* process any acks */
5466 if (!igb_check_for_ack(hw, vf))
5467 igb_rcv_ack_from_vf(adapter, vf);
5472 * igb_set_uta - Set unicast filter table address
5473 * @adapter: board private structure
5475 * The unicast table address is a register array of 32-bit registers.
5476 * The table is meant to be used in a way similar to how the MTA is used
5477 * however due to certain limitations in the hardware it is necessary to
5478 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5479 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5481 static void igb_set_uta(struct igb_adapter *adapter)
5483 struct e1000_hw *hw = &adapter->hw;
5484 int i;
5486 /* The UTA table only exists on 82576 hardware and newer */
5487 if (hw->mac.type < e1000_82576)
5488 return;
5490 /* we only need to do this if VMDq is enabled */
5491 if (!adapter->vfs_allocated_count)
5492 return;
5494 for (i = 0; i < hw->mac.uta_reg_count; i++)
5495 array_wr32(E1000_UTA, i, ~0);
5499 * igb_intr_msi - Interrupt Handler
5500 * @irq: interrupt number
5501 * @data: pointer to a network interface device structure
5503 static irqreturn_t igb_intr_msi(int irq, void *data)
5505 struct igb_adapter *adapter = data;
5506 struct igb_q_vector *q_vector = adapter->q_vector[0];
5507 struct e1000_hw *hw = &adapter->hw;
5508 /* read ICR disables interrupts using IAM */
5509 u32 icr = rd32(E1000_ICR);
5511 igb_write_itr(q_vector);
5513 if (icr & E1000_ICR_DRSTA)
5514 schedule_work(&adapter->reset_task);
5516 if (icr & E1000_ICR_DOUTSYNC) {
5517 /* HW is reporting DMA is out of sync */
5518 adapter->stats.doosync++;
5521 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5522 hw->mac.get_link_status = 1;
5523 if (!test_bit(__IGB_DOWN, &adapter->state))
5524 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5527 napi_schedule(&q_vector->napi);
5529 return IRQ_HANDLED;
5533 * igb_intr - Legacy Interrupt Handler
5534 * @irq: interrupt number
5535 * @data: pointer to a network interface device structure
5537 static irqreturn_t igb_intr(int irq, void *data)
5539 struct igb_adapter *adapter = data;
5540 struct igb_q_vector *q_vector = adapter->q_vector[0];
5541 struct e1000_hw *hw = &adapter->hw;
5542 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5543 * need for the IMC write */
5544 u32 icr = rd32(E1000_ICR);
5546 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5547 * not set, then the adapter didn't send an interrupt */
5548 if (!(icr & E1000_ICR_INT_ASSERTED))
5549 return IRQ_NONE;
5551 igb_write_itr(q_vector);
5553 if (icr & E1000_ICR_DRSTA)
5554 schedule_work(&adapter->reset_task);
5556 if (icr & E1000_ICR_DOUTSYNC) {
5557 /* HW is reporting DMA is out of sync */
5558 adapter->stats.doosync++;
5561 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5562 hw->mac.get_link_status = 1;
5563 /* guard against interrupt when we're going down */
5564 if (!test_bit(__IGB_DOWN, &adapter->state))
5565 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5568 napi_schedule(&q_vector->napi);
5570 return IRQ_HANDLED;
5573 void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5575 struct igb_adapter *adapter = q_vector->adapter;
5576 struct e1000_hw *hw = &adapter->hw;
5578 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5579 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5580 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5581 igb_set_itr(q_vector);
5582 else
5583 igb_update_ring_itr(q_vector);
5586 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5587 if (adapter->msix_entries)
5588 wr32(E1000_EIMS, q_vector->eims_value);
5589 else
5590 igb_irq_enable(adapter);
5595 * igb_poll - NAPI Rx polling callback
5596 * @napi: napi polling structure
5597 * @budget: count of how many packets we should handle
5599 static int igb_poll(struct napi_struct *napi, int budget)
5601 struct igb_q_vector *q_vector = container_of(napi,
5602 struct igb_q_vector,
5603 napi);
5604 bool clean_complete = true;
5606 #ifdef CONFIG_IGB_DCA
5607 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5608 igb_update_dca(q_vector);
5609 #endif
5610 if (q_vector->tx.ring)
5611 clean_complete = igb_clean_tx_irq(q_vector);
5613 if (q_vector->rx.ring)
5614 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5616 /* If all work not completed, return budget and keep polling */
5617 if (!clean_complete)
5618 return budget;
5620 /* If not enough Rx work done, exit the polling mode */
5621 napi_complete(napi);
5622 igb_ring_irq_enable(q_vector);
5624 return 0;
5628 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5629 * @adapter: board private structure
5630 * @shhwtstamps: timestamp structure to update
5631 * @regval: unsigned 64bit system time value.
5633 * We need to convert the system time value stored in the RX/TXSTMP registers
5634 * into a hwtstamp which can be used by the upper level timestamping functions
5636 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5637 struct skb_shared_hwtstamps *shhwtstamps,
5638 u64 regval)
5640 u64 ns;
5643 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5644 * 24 to match clock shift we setup earlier.
5646 if (adapter->hw.mac.type >= e1000_82580)
5647 regval <<= IGB_82580_TSYNC_SHIFT;
5649 ns = timecounter_cyc2time(&adapter->clock, regval);
5650 timecompare_update(&adapter->compare, ns);
5651 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5652 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5653 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5657 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5658 * @q_vector: pointer to q_vector containing needed info
5659 * @buffer: pointer to igb_tx_buffer structure
5661 * If we were asked to do hardware stamping and such a time stamp is
5662 * available, then it must have been for this skb here because we only
5663 * allow only one such packet into the queue.
5665 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5666 struct igb_tx_buffer *buffer_info)
5668 struct igb_adapter *adapter = q_vector->adapter;
5669 struct e1000_hw *hw = &adapter->hw;
5670 struct skb_shared_hwtstamps shhwtstamps;
5671 u64 regval;
5673 /* if skb does not support hw timestamp or TX stamp not valid exit */
5674 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5675 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5676 return;
5678 regval = rd32(E1000_TXSTMPL);
5679 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5681 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5682 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5686 * igb_clean_tx_irq - Reclaim resources after transmit completes
5687 * @q_vector: pointer to q_vector containing needed info
5688 * returns true if ring is completely cleaned
5690 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5692 struct igb_adapter *adapter = q_vector->adapter;
5693 struct igb_ring *tx_ring = q_vector->tx.ring;
5694 struct igb_tx_buffer *tx_buffer;
5695 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5696 unsigned int total_bytes = 0, total_packets = 0;
5697 unsigned int budget = q_vector->tx.work_limit;
5698 unsigned int i = tx_ring->next_to_clean;
5700 if (test_bit(__IGB_DOWN, &adapter->state))
5701 return true;
5703 tx_buffer = &tx_ring->tx_buffer_info[i];
5704 tx_desc = IGB_TX_DESC(tx_ring, i);
5705 i -= tx_ring->count;
5707 for (; budget; budget--) {
5708 eop_desc = tx_buffer->next_to_watch;
5710 /* prevent any other reads prior to eop_desc */
5711 rmb();
5713 /* if next_to_watch is not set then there is no work pending */
5714 if (!eop_desc)
5715 break;
5717 /* if DD is not set pending work has not been completed */
5718 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5719 break;
5721 /* clear next_to_watch to prevent false hangs */
5722 tx_buffer->next_to_watch = NULL;
5724 /* update the statistics for this packet */
5725 total_bytes += tx_buffer->bytecount;
5726 total_packets += tx_buffer->gso_segs;
5728 /* retrieve hardware timestamp */
5729 igb_tx_hwtstamp(q_vector, tx_buffer);
5731 /* free the skb */
5732 dev_kfree_skb_any(tx_buffer->skb);
5733 tx_buffer->skb = NULL;
5735 /* unmap skb header data */
5736 dma_unmap_single(tx_ring->dev,
5737 tx_buffer->dma,
5738 tx_buffer->length,
5739 DMA_TO_DEVICE);
5741 /* clear last DMA location and unmap remaining buffers */
5742 while (tx_desc != eop_desc) {
5743 tx_buffer->dma = 0;
5745 tx_buffer++;
5746 tx_desc++;
5747 i++;
5748 if (unlikely(!i)) {
5749 i -= tx_ring->count;
5750 tx_buffer = tx_ring->tx_buffer_info;
5751 tx_desc = IGB_TX_DESC(tx_ring, 0);
5754 /* unmap any remaining paged data */
5755 if (tx_buffer->dma) {
5756 dma_unmap_page(tx_ring->dev,
5757 tx_buffer->dma,
5758 tx_buffer->length,
5759 DMA_TO_DEVICE);
5763 /* clear last DMA location */
5764 tx_buffer->dma = 0;
5766 /* move us one more past the eop_desc for start of next pkt */
5767 tx_buffer++;
5768 tx_desc++;
5769 i++;
5770 if (unlikely(!i)) {
5771 i -= tx_ring->count;
5772 tx_buffer = tx_ring->tx_buffer_info;
5773 tx_desc = IGB_TX_DESC(tx_ring, 0);
5777 i += tx_ring->count;
5778 tx_ring->next_to_clean = i;
5779 u64_stats_update_begin(&tx_ring->tx_syncp);
5780 tx_ring->tx_stats.bytes += total_bytes;
5781 tx_ring->tx_stats.packets += total_packets;
5782 u64_stats_update_end(&tx_ring->tx_syncp);
5783 q_vector->tx.total_bytes += total_bytes;
5784 q_vector->tx.total_packets += total_packets;
5786 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5787 struct e1000_hw *hw = &adapter->hw;
5789 eop_desc = tx_buffer->next_to_watch;
5791 /* Detect a transmit hang in hardware, this serializes the
5792 * check with the clearing of time_stamp and movement of i */
5793 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5794 if (eop_desc &&
5795 time_after(jiffies, tx_buffer->time_stamp +
5796 (adapter->tx_timeout_factor * HZ)) &&
5797 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5799 /* detected Tx unit hang */
5800 dev_err(tx_ring->dev,
5801 "Detected Tx Unit Hang\n"
5802 " Tx Queue <%d>\n"
5803 " TDH <%x>\n"
5804 " TDT <%x>\n"
5805 " next_to_use <%x>\n"
5806 " next_to_clean <%x>\n"
5807 "buffer_info[next_to_clean]\n"
5808 " time_stamp <%lx>\n"
5809 " next_to_watch <%p>\n"
5810 " jiffies <%lx>\n"
5811 " desc.status <%x>\n",
5812 tx_ring->queue_index,
5813 rd32(E1000_TDH(tx_ring->reg_idx)),
5814 readl(tx_ring->tail),
5815 tx_ring->next_to_use,
5816 tx_ring->next_to_clean,
5817 tx_buffer->time_stamp,
5818 eop_desc,
5819 jiffies,
5820 eop_desc->wb.status);
5821 netif_stop_subqueue(tx_ring->netdev,
5822 tx_ring->queue_index);
5824 /* we are about to reset, no point in enabling stuff */
5825 return true;
5829 if (unlikely(total_packets &&
5830 netif_carrier_ok(tx_ring->netdev) &&
5831 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5832 /* Make sure that anybody stopping the queue after this
5833 * sees the new next_to_clean.
5835 smp_mb();
5836 if (__netif_subqueue_stopped(tx_ring->netdev,
5837 tx_ring->queue_index) &&
5838 !(test_bit(__IGB_DOWN, &adapter->state))) {
5839 netif_wake_subqueue(tx_ring->netdev,
5840 tx_ring->queue_index);
5842 u64_stats_update_begin(&tx_ring->tx_syncp);
5843 tx_ring->tx_stats.restart_queue++;
5844 u64_stats_update_end(&tx_ring->tx_syncp);
5848 return !!budget;
5851 static inline void igb_rx_checksum(struct igb_ring *ring,
5852 union e1000_adv_rx_desc *rx_desc,
5853 struct sk_buff *skb)
5855 skb_checksum_none_assert(skb);
5857 /* Ignore Checksum bit is set */
5858 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5859 return;
5861 /* Rx checksum disabled via ethtool */
5862 if (!(ring->netdev->features & NETIF_F_RXCSUM))
5863 return;
5865 /* TCP/UDP checksum error bit is set */
5866 if (igb_test_staterr(rx_desc,
5867 E1000_RXDEXT_STATERR_TCPE |
5868 E1000_RXDEXT_STATERR_IPE)) {
5870 * work around errata with sctp packets where the TCPE aka
5871 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5872 * packets, (aka let the stack check the crc32c)
5874 if (!((skb->len == 60) &&
5875 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5876 u64_stats_update_begin(&ring->rx_syncp);
5877 ring->rx_stats.csum_err++;
5878 u64_stats_update_end(&ring->rx_syncp);
5880 /* let the stack verify checksum errors */
5881 return;
5883 /* It must be a TCP or UDP packet with a valid checksum */
5884 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5885 E1000_RXD_STAT_UDPCS))
5886 skb->ip_summed = CHECKSUM_UNNECESSARY;
5888 dev_dbg(ring->dev, "cksum success: bits %08X\n",
5889 le32_to_cpu(rx_desc->wb.upper.status_error));
5892 static inline void igb_rx_hash(struct igb_ring *ring,
5893 union e1000_adv_rx_desc *rx_desc,
5894 struct sk_buff *skb)
5896 if (ring->netdev->features & NETIF_F_RXHASH)
5897 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5900 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5901 union e1000_adv_rx_desc *rx_desc,
5902 struct sk_buff *skb)
5904 struct igb_adapter *adapter = q_vector->adapter;
5905 struct e1000_hw *hw = &adapter->hw;
5906 u64 regval;
5908 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5909 E1000_RXDADV_STAT_TS))
5910 return;
5913 * If this bit is set, then the RX registers contain the time stamp. No
5914 * other packet will be time stamped until we read these registers, so
5915 * read the registers to make them available again. Because only one
5916 * packet can be time stamped at a time, we know that the register
5917 * values must belong to this one here and therefore we don't need to
5918 * compare any of the additional attributes stored for it.
5920 * If nothing went wrong, then it should have a shared tx_flags that we
5921 * can turn into a skb_shared_hwtstamps.
5923 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5924 u32 *stamp = (u32 *)skb->data;
5925 regval = le32_to_cpu(*(stamp + 2));
5926 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5927 skb_pull(skb, IGB_TS_HDR_LEN);
5928 } else {
5929 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5930 return;
5932 regval = rd32(E1000_RXSTMPL);
5933 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5936 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5939 static void igb_rx_vlan(struct igb_ring *ring,
5940 union e1000_adv_rx_desc *rx_desc,
5941 struct sk_buff *skb)
5943 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5944 u16 vid;
5945 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
5946 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
5947 vid = be16_to_cpu(rx_desc->wb.upper.vlan);
5948 else
5949 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5951 __vlan_hwaccel_put_tag(skb, vid);
5955 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5957 /* HW will not DMA in data larger than the given buffer, even if it
5958 * parses the (NFS, of course) header to be larger. In that case, it
5959 * fills the header buffer and spills the rest into the page.
5961 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5962 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5963 if (hlen > IGB_RX_HDR_LEN)
5964 hlen = IGB_RX_HDR_LEN;
5965 return hlen;
5968 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5970 struct igb_ring *rx_ring = q_vector->rx.ring;
5971 union e1000_adv_rx_desc *rx_desc;
5972 const int current_node = numa_node_id();
5973 unsigned int total_bytes = 0, total_packets = 0;
5974 u16 cleaned_count = igb_desc_unused(rx_ring);
5975 u16 i = rx_ring->next_to_clean;
5977 rx_desc = IGB_RX_DESC(rx_ring, i);
5979 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
5980 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5981 struct sk_buff *skb = buffer_info->skb;
5982 union e1000_adv_rx_desc *next_rxd;
5984 buffer_info->skb = NULL;
5985 prefetch(skb->data);
5987 i++;
5988 if (i == rx_ring->count)
5989 i = 0;
5991 next_rxd = IGB_RX_DESC(rx_ring, i);
5992 prefetch(next_rxd);
5995 * This memory barrier is needed to keep us from reading
5996 * any other fields out of the rx_desc until we know the
5997 * RXD_STAT_DD bit is set
5999 rmb();
6001 if (!skb_is_nonlinear(skb)) {
6002 __skb_put(skb, igb_get_hlen(rx_desc));
6003 dma_unmap_single(rx_ring->dev, buffer_info->dma,
6004 IGB_RX_HDR_LEN,
6005 DMA_FROM_DEVICE);
6006 buffer_info->dma = 0;
6009 if (rx_desc->wb.upper.length) {
6010 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6012 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6013 buffer_info->page,
6014 buffer_info->page_offset,
6015 length);
6017 skb->len += length;
6018 skb->data_len += length;
6019 skb->truesize += PAGE_SIZE / 2;
6021 if ((page_count(buffer_info->page) != 1) ||
6022 (page_to_nid(buffer_info->page) != current_node))
6023 buffer_info->page = NULL;
6024 else
6025 get_page(buffer_info->page);
6027 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6028 PAGE_SIZE / 2, DMA_FROM_DEVICE);
6029 buffer_info->page_dma = 0;
6032 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6033 struct igb_rx_buffer *next_buffer;
6034 next_buffer = &rx_ring->rx_buffer_info[i];
6035 buffer_info->skb = next_buffer->skb;
6036 buffer_info->dma = next_buffer->dma;
6037 next_buffer->skb = skb;
6038 next_buffer->dma = 0;
6039 goto next_desc;
6042 if (igb_test_staterr(rx_desc,
6043 E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
6044 dev_kfree_skb_any(skb);
6045 goto next_desc;
6048 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6049 igb_rx_hash(rx_ring, rx_desc, skb);
6050 igb_rx_checksum(rx_ring, rx_desc, skb);
6051 igb_rx_vlan(rx_ring, rx_desc, skb);
6053 total_bytes += skb->len;
6054 total_packets++;
6056 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6058 napi_gro_receive(&q_vector->napi, skb);
6060 budget--;
6061 next_desc:
6062 if (!budget)
6063 break;
6065 cleaned_count++;
6066 /* return some buffers to hardware, one at a time is too slow */
6067 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6068 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6069 cleaned_count = 0;
6072 /* use prefetched values */
6073 rx_desc = next_rxd;
6076 rx_ring->next_to_clean = i;
6077 u64_stats_update_begin(&rx_ring->rx_syncp);
6078 rx_ring->rx_stats.packets += total_packets;
6079 rx_ring->rx_stats.bytes += total_bytes;
6080 u64_stats_update_end(&rx_ring->rx_syncp);
6081 q_vector->rx.total_packets += total_packets;
6082 q_vector->rx.total_bytes += total_bytes;
6084 if (cleaned_count)
6085 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6087 return !!budget;
6090 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6091 struct igb_rx_buffer *bi)
6093 struct sk_buff *skb = bi->skb;
6094 dma_addr_t dma = bi->dma;
6096 if (dma)
6097 return true;
6099 if (likely(!skb)) {
6100 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6101 IGB_RX_HDR_LEN);
6102 bi->skb = skb;
6103 if (!skb) {
6104 rx_ring->rx_stats.alloc_failed++;
6105 return false;
6108 /* initialize skb for ring */
6109 skb_record_rx_queue(skb, rx_ring->queue_index);
6112 dma = dma_map_single(rx_ring->dev, skb->data,
6113 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6115 if (dma_mapping_error(rx_ring->dev, dma)) {
6116 rx_ring->rx_stats.alloc_failed++;
6117 return false;
6120 bi->dma = dma;
6121 return true;
6124 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6125 struct igb_rx_buffer *bi)
6127 struct page *page = bi->page;
6128 dma_addr_t page_dma = bi->page_dma;
6129 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6131 if (page_dma)
6132 return true;
6134 if (!page) {
6135 page = netdev_alloc_page(rx_ring->netdev);
6136 bi->page = page;
6137 if (unlikely(!page)) {
6138 rx_ring->rx_stats.alloc_failed++;
6139 return false;
6143 page_dma = dma_map_page(rx_ring->dev, page,
6144 page_offset, PAGE_SIZE / 2,
6145 DMA_FROM_DEVICE);
6147 if (dma_mapping_error(rx_ring->dev, page_dma)) {
6148 rx_ring->rx_stats.alloc_failed++;
6149 return false;
6152 bi->page_dma = page_dma;
6153 bi->page_offset = page_offset;
6154 return true;
6158 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6159 * @adapter: address of board private structure
6161 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6163 union e1000_adv_rx_desc *rx_desc;
6164 struct igb_rx_buffer *bi;
6165 u16 i = rx_ring->next_to_use;
6167 rx_desc = IGB_RX_DESC(rx_ring, i);
6168 bi = &rx_ring->rx_buffer_info[i];
6169 i -= rx_ring->count;
6171 while (cleaned_count--) {
6172 if (!igb_alloc_mapped_skb(rx_ring, bi))
6173 break;
6175 /* Refresh the desc even if buffer_addrs didn't change
6176 * because each write-back erases this info. */
6177 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6179 if (!igb_alloc_mapped_page(rx_ring, bi))
6180 break;
6182 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6184 rx_desc++;
6185 bi++;
6186 i++;
6187 if (unlikely(!i)) {
6188 rx_desc = IGB_RX_DESC(rx_ring, 0);
6189 bi = rx_ring->rx_buffer_info;
6190 i -= rx_ring->count;
6193 /* clear the hdr_addr for the next_to_use descriptor */
6194 rx_desc->read.hdr_addr = 0;
6197 i += rx_ring->count;
6199 if (rx_ring->next_to_use != i) {
6200 rx_ring->next_to_use = i;
6202 /* Force memory writes to complete before letting h/w
6203 * know there are new descriptors to fetch. (Only
6204 * applicable for weak-ordered memory model archs,
6205 * such as IA-64). */
6206 wmb();
6207 writel(i, rx_ring->tail);
6212 * igb_mii_ioctl -
6213 * @netdev:
6214 * @ifreq:
6215 * @cmd:
6217 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6219 struct igb_adapter *adapter = netdev_priv(netdev);
6220 struct mii_ioctl_data *data = if_mii(ifr);
6222 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6223 return -EOPNOTSUPP;
6225 switch (cmd) {
6226 case SIOCGMIIPHY:
6227 data->phy_id = adapter->hw.phy.addr;
6228 break;
6229 case SIOCGMIIREG:
6230 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6231 &data->val_out))
6232 return -EIO;
6233 break;
6234 case SIOCSMIIREG:
6235 default:
6236 return -EOPNOTSUPP;
6238 return 0;
6242 * igb_hwtstamp_ioctl - control hardware time stamping
6243 * @netdev:
6244 * @ifreq:
6245 * @cmd:
6247 * Outgoing time stamping can be enabled and disabled. Play nice and
6248 * disable it when requested, although it shouldn't case any overhead
6249 * when no packet needs it. At most one packet in the queue may be
6250 * marked for time stamping, otherwise it would be impossible to tell
6251 * for sure to which packet the hardware time stamp belongs.
6253 * Incoming time stamping has to be configured via the hardware
6254 * filters. Not all combinations are supported, in particular event
6255 * type has to be specified. Matching the kind of event packet is
6256 * not supported, with the exception of "all V2 events regardless of
6257 * level 2 or 4".
6260 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6261 struct ifreq *ifr, int cmd)
6263 struct igb_adapter *adapter = netdev_priv(netdev);
6264 struct e1000_hw *hw = &adapter->hw;
6265 struct hwtstamp_config config;
6266 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6267 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6268 u32 tsync_rx_cfg = 0;
6269 bool is_l4 = false;
6270 bool is_l2 = false;
6271 u32 regval;
6273 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6274 return -EFAULT;
6276 /* reserved for future extensions */
6277 if (config.flags)
6278 return -EINVAL;
6280 switch (config.tx_type) {
6281 case HWTSTAMP_TX_OFF:
6282 tsync_tx_ctl = 0;
6283 case HWTSTAMP_TX_ON:
6284 break;
6285 default:
6286 return -ERANGE;
6289 switch (config.rx_filter) {
6290 case HWTSTAMP_FILTER_NONE:
6291 tsync_rx_ctl = 0;
6292 break;
6293 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6294 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6295 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6296 case HWTSTAMP_FILTER_ALL:
6298 * register TSYNCRXCFG must be set, therefore it is not
6299 * possible to time stamp both Sync and Delay_Req messages
6300 * => fall back to time stamping all packets
6302 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6303 config.rx_filter = HWTSTAMP_FILTER_ALL;
6304 break;
6305 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6306 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6307 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6308 is_l4 = true;
6309 break;
6310 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6311 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6312 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6313 is_l4 = true;
6314 break;
6315 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6316 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6317 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6318 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6319 is_l2 = true;
6320 is_l4 = true;
6321 config.rx_filter = HWTSTAMP_FILTER_SOME;
6322 break;
6323 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6324 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6325 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6326 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6327 is_l2 = true;
6328 is_l4 = true;
6329 config.rx_filter = HWTSTAMP_FILTER_SOME;
6330 break;
6331 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6332 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6333 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6334 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6335 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6336 is_l2 = true;
6337 is_l4 = true;
6338 break;
6339 default:
6340 return -ERANGE;
6343 if (hw->mac.type == e1000_82575) {
6344 if (tsync_rx_ctl | tsync_tx_ctl)
6345 return -EINVAL;
6346 return 0;
6350 * Per-packet timestamping only works if all packets are
6351 * timestamped, so enable timestamping in all packets as
6352 * long as one rx filter was configured.
6354 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6355 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6356 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6359 /* enable/disable TX */
6360 regval = rd32(E1000_TSYNCTXCTL);
6361 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6362 regval |= tsync_tx_ctl;
6363 wr32(E1000_TSYNCTXCTL, regval);
6365 /* enable/disable RX */
6366 regval = rd32(E1000_TSYNCRXCTL);
6367 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6368 regval |= tsync_rx_ctl;
6369 wr32(E1000_TSYNCRXCTL, regval);
6371 /* define which PTP packets are time stamped */
6372 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6374 /* define ethertype filter for timestamped packets */
6375 if (is_l2)
6376 wr32(E1000_ETQF(3),
6377 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6378 E1000_ETQF_1588 | /* enable timestamping */
6379 ETH_P_1588)); /* 1588 eth protocol type */
6380 else
6381 wr32(E1000_ETQF(3), 0);
6383 #define PTP_PORT 319
6384 /* L4 Queue Filter[3]: filter by destination port and protocol */
6385 if (is_l4) {
6386 u32 ftqf = (IPPROTO_UDP /* UDP */
6387 | E1000_FTQF_VF_BP /* VF not compared */
6388 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6389 | E1000_FTQF_MASK); /* mask all inputs */
6390 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6392 wr32(E1000_IMIR(3), htons(PTP_PORT));
6393 wr32(E1000_IMIREXT(3),
6394 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6395 if (hw->mac.type == e1000_82576) {
6396 /* enable source port check */
6397 wr32(E1000_SPQF(3), htons(PTP_PORT));
6398 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6400 wr32(E1000_FTQF(3), ftqf);
6401 } else {
6402 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6404 wrfl();
6406 adapter->hwtstamp_config = config;
6408 /* clear TX/RX time stamp registers, just to be sure */
6409 regval = rd32(E1000_TXSTMPH);
6410 regval = rd32(E1000_RXSTMPH);
6412 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6413 -EFAULT : 0;
6417 * igb_ioctl -
6418 * @netdev:
6419 * @ifreq:
6420 * @cmd:
6422 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6424 switch (cmd) {
6425 case SIOCGMIIPHY:
6426 case SIOCGMIIREG:
6427 case SIOCSMIIREG:
6428 return igb_mii_ioctl(netdev, ifr, cmd);
6429 case SIOCSHWTSTAMP:
6430 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6431 default:
6432 return -EOPNOTSUPP;
6436 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6438 struct igb_adapter *adapter = hw->back;
6439 u16 cap_offset;
6441 cap_offset = adapter->pdev->pcie_cap;
6442 if (!cap_offset)
6443 return -E1000_ERR_CONFIG;
6445 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6447 return 0;
6450 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6452 struct igb_adapter *adapter = hw->back;
6453 u16 cap_offset;
6455 cap_offset = adapter->pdev->pcie_cap;
6456 if (!cap_offset)
6457 return -E1000_ERR_CONFIG;
6459 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6461 return 0;
6464 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6466 struct igb_adapter *adapter = netdev_priv(netdev);
6467 struct e1000_hw *hw = &adapter->hw;
6468 u32 ctrl, rctl;
6469 bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6471 if (enable) {
6472 /* enable VLAN tag insert/strip */
6473 ctrl = rd32(E1000_CTRL);
6474 ctrl |= E1000_CTRL_VME;
6475 wr32(E1000_CTRL, ctrl);
6477 /* Disable CFI check */
6478 rctl = rd32(E1000_RCTL);
6479 rctl &= ~E1000_RCTL_CFIEN;
6480 wr32(E1000_RCTL, rctl);
6481 } else {
6482 /* disable VLAN tag insert/strip */
6483 ctrl = rd32(E1000_CTRL);
6484 ctrl &= ~E1000_CTRL_VME;
6485 wr32(E1000_CTRL, ctrl);
6488 igb_rlpml_set(adapter);
6491 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6493 struct igb_adapter *adapter = netdev_priv(netdev);
6494 struct e1000_hw *hw = &adapter->hw;
6495 int pf_id = adapter->vfs_allocated_count;
6497 /* attempt to add filter to vlvf array */
6498 igb_vlvf_set(adapter, vid, true, pf_id);
6500 /* add the filter since PF can receive vlans w/o entry in vlvf */
6501 igb_vfta_set(hw, vid, true);
6503 set_bit(vid, adapter->active_vlans);
6506 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6508 struct igb_adapter *adapter = netdev_priv(netdev);
6509 struct e1000_hw *hw = &adapter->hw;
6510 int pf_id = adapter->vfs_allocated_count;
6511 s32 err;
6513 /* remove vlan from VLVF table array */
6514 err = igb_vlvf_set(adapter, vid, false, pf_id);
6516 /* if vid was not present in VLVF just remove it from table */
6517 if (err)
6518 igb_vfta_set(hw, vid, false);
6520 clear_bit(vid, adapter->active_vlans);
6523 static void igb_restore_vlan(struct igb_adapter *adapter)
6525 u16 vid;
6527 igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6529 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6530 igb_vlan_rx_add_vid(adapter->netdev, vid);
6533 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6535 struct pci_dev *pdev = adapter->pdev;
6536 struct e1000_mac_info *mac = &adapter->hw.mac;
6538 mac->autoneg = 0;
6540 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6541 * for the switch() below to work */
6542 if ((spd & 1) || (dplx & ~1))
6543 goto err_inval;
6545 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6546 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6547 spd != SPEED_1000 &&
6548 dplx != DUPLEX_FULL)
6549 goto err_inval;
6551 switch (spd + dplx) {
6552 case SPEED_10 + DUPLEX_HALF:
6553 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6554 break;
6555 case SPEED_10 + DUPLEX_FULL:
6556 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6557 break;
6558 case SPEED_100 + DUPLEX_HALF:
6559 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6560 break;
6561 case SPEED_100 + DUPLEX_FULL:
6562 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6563 break;
6564 case SPEED_1000 + DUPLEX_FULL:
6565 mac->autoneg = 1;
6566 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6567 break;
6568 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6569 default:
6570 goto err_inval;
6572 return 0;
6574 err_inval:
6575 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6576 return -EINVAL;
6579 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6581 struct net_device *netdev = pci_get_drvdata(pdev);
6582 struct igb_adapter *adapter = netdev_priv(netdev);
6583 struct e1000_hw *hw = &adapter->hw;
6584 u32 ctrl, rctl, status;
6585 u32 wufc = adapter->wol;
6586 #ifdef CONFIG_PM
6587 int retval = 0;
6588 #endif
6590 netif_device_detach(netdev);
6592 if (netif_running(netdev))
6593 igb_close(netdev);
6595 igb_clear_interrupt_scheme(adapter);
6597 #ifdef CONFIG_PM
6598 retval = pci_save_state(pdev);
6599 if (retval)
6600 return retval;
6601 #endif
6603 status = rd32(E1000_STATUS);
6604 if (status & E1000_STATUS_LU)
6605 wufc &= ~E1000_WUFC_LNKC;
6607 if (wufc) {
6608 igb_setup_rctl(adapter);
6609 igb_set_rx_mode(netdev);
6611 /* turn on all-multi mode if wake on multicast is enabled */
6612 if (wufc & E1000_WUFC_MC) {
6613 rctl = rd32(E1000_RCTL);
6614 rctl |= E1000_RCTL_MPE;
6615 wr32(E1000_RCTL, rctl);
6618 ctrl = rd32(E1000_CTRL);
6619 /* advertise wake from D3Cold */
6620 #define E1000_CTRL_ADVD3WUC 0x00100000
6621 /* phy power management enable */
6622 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6623 ctrl |= E1000_CTRL_ADVD3WUC;
6624 wr32(E1000_CTRL, ctrl);
6626 /* Allow time for pending master requests to run */
6627 igb_disable_pcie_master(hw);
6629 wr32(E1000_WUC, E1000_WUC_PME_EN);
6630 wr32(E1000_WUFC, wufc);
6631 } else {
6632 wr32(E1000_WUC, 0);
6633 wr32(E1000_WUFC, 0);
6636 *enable_wake = wufc || adapter->en_mng_pt;
6637 if (!*enable_wake)
6638 igb_power_down_link(adapter);
6639 else
6640 igb_power_up_link(adapter);
6642 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6643 * would have already happened in close and is redundant. */
6644 igb_release_hw_control(adapter);
6646 pci_disable_device(pdev);
6648 return 0;
6651 #ifdef CONFIG_PM
6652 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6654 int retval;
6655 bool wake;
6657 retval = __igb_shutdown(pdev, &wake);
6658 if (retval)
6659 return retval;
6661 if (wake) {
6662 pci_prepare_to_sleep(pdev);
6663 } else {
6664 pci_wake_from_d3(pdev, false);
6665 pci_set_power_state(pdev, PCI_D3hot);
6668 return 0;
6671 static int igb_resume(struct pci_dev *pdev)
6673 struct net_device *netdev = pci_get_drvdata(pdev);
6674 struct igb_adapter *adapter = netdev_priv(netdev);
6675 struct e1000_hw *hw = &adapter->hw;
6676 u32 err;
6678 pci_set_power_state(pdev, PCI_D0);
6679 pci_restore_state(pdev);
6680 pci_save_state(pdev);
6682 err = pci_enable_device_mem(pdev);
6683 if (err) {
6684 dev_err(&pdev->dev,
6685 "igb: Cannot enable PCI device from suspend\n");
6686 return err;
6688 pci_set_master(pdev);
6690 pci_enable_wake(pdev, PCI_D3hot, 0);
6691 pci_enable_wake(pdev, PCI_D3cold, 0);
6693 if (igb_init_interrupt_scheme(adapter)) {
6694 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6695 return -ENOMEM;
6698 igb_reset(adapter);
6700 /* let the f/w know that the h/w is now under the control of the
6701 * driver. */
6702 igb_get_hw_control(adapter);
6704 wr32(E1000_WUS, ~0);
6706 if (netif_running(netdev)) {
6707 err = igb_open(netdev);
6708 if (err)
6709 return err;
6712 netif_device_attach(netdev);
6714 return 0;
6716 #endif
6718 static void igb_shutdown(struct pci_dev *pdev)
6720 bool wake;
6722 __igb_shutdown(pdev, &wake);
6724 if (system_state == SYSTEM_POWER_OFF) {
6725 pci_wake_from_d3(pdev, wake);
6726 pci_set_power_state(pdev, PCI_D3hot);
6730 #ifdef CONFIG_NET_POLL_CONTROLLER
6732 * Polling 'interrupt' - used by things like netconsole to send skbs
6733 * without having to re-enable interrupts. It's not called while
6734 * the interrupt routine is executing.
6736 static void igb_netpoll(struct net_device *netdev)
6738 struct igb_adapter *adapter = netdev_priv(netdev);
6739 struct e1000_hw *hw = &adapter->hw;
6740 struct igb_q_vector *q_vector;
6741 int i;
6743 for (i = 0; i < adapter->num_q_vectors; i++) {
6744 q_vector = adapter->q_vector[i];
6745 if (adapter->msix_entries)
6746 wr32(E1000_EIMC, q_vector->eims_value);
6747 else
6748 igb_irq_disable(adapter);
6749 napi_schedule(&q_vector->napi);
6752 #endif /* CONFIG_NET_POLL_CONTROLLER */
6755 * igb_io_error_detected - called when PCI error is detected
6756 * @pdev: Pointer to PCI device
6757 * @state: The current pci connection state
6759 * This function is called after a PCI bus error affecting
6760 * this device has been detected.
6762 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6763 pci_channel_state_t state)
6765 struct net_device *netdev = pci_get_drvdata(pdev);
6766 struct igb_adapter *adapter = netdev_priv(netdev);
6768 netif_device_detach(netdev);
6770 if (state == pci_channel_io_perm_failure)
6771 return PCI_ERS_RESULT_DISCONNECT;
6773 if (netif_running(netdev))
6774 igb_down(adapter);
6775 pci_disable_device(pdev);
6777 /* Request a slot slot reset. */
6778 return PCI_ERS_RESULT_NEED_RESET;
6782 * igb_io_slot_reset - called after the pci bus has been reset.
6783 * @pdev: Pointer to PCI device
6785 * Restart the card from scratch, as if from a cold-boot. Implementation
6786 * resembles the first-half of the igb_resume routine.
6788 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6790 struct net_device *netdev = pci_get_drvdata(pdev);
6791 struct igb_adapter *adapter = netdev_priv(netdev);
6792 struct e1000_hw *hw = &adapter->hw;
6793 pci_ers_result_t result;
6794 int err;
6796 if (pci_enable_device_mem(pdev)) {
6797 dev_err(&pdev->dev,
6798 "Cannot re-enable PCI device after reset.\n");
6799 result = PCI_ERS_RESULT_DISCONNECT;
6800 } else {
6801 pci_set_master(pdev);
6802 pci_restore_state(pdev);
6803 pci_save_state(pdev);
6805 pci_enable_wake(pdev, PCI_D3hot, 0);
6806 pci_enable_wake(pdev, PCI_D3cold, 0);
6808 igb_reset(adapter);
6809 wr32(E1000_WUS, ~0);
6810 result = PCI_ERS_RESULT_RECOVERED;
6813 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6814 if (err) {
6815 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6816 "failed 0x%0x\n", err);
6817 /* non-fatal, continue */
6820 return result;
6824 * igb_io_resume - called when traffic can start flowing again.
6825 * @pdev: Pointer to PCI device
6827 * This callback is called when the error recovery driver tells us that
6828 * its OK to resume normal operation. Implementation resembles the
6829 * second-half of the igb_resume routine.
6831 static void igb_io_resume(struct pci_dev *pdev)
6833 struct net_device *netdev = pci_get_drvdata(pdev);
6834 struct igb_adapter *adapter = netdev_priv(netdev);
6836 if (netif_running(netdev)) {
6837 if (igb_up(adapter)) {
6838 dev_err(&pdev->dev, "igb_up failed after reset\n");
6839 return;
6843 netif_device_attach(netdev);
6845 /* let the f/w know that the h/w is now under the control of the
6846 * driver. */
6847 igb_get_hw_control(adapter);
6850 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6851 u8 qsel)
6853 u32 rar_low, rar_high;
6854 struct e1000_hw *hw = &adapter->hw;
6856 /* HW expects these in little endian so we reverse the byte order
6857 * from network order (big endian) to little endian
6859 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6860 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6861 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6863 /* Indicate to hardware the Address is Valid. */
6864 rar_high |= E1000_RAH_AV;
6866 if (hw->mac.type == e1000_82575)
6867 rar_high |= E1000_RAH_POOL_1 * qsel;
6868 else
6869 rar_high |= E1000_RAH_POOL_1 << qsel;
6871 wr32(E1000_RAL(index), rar_low);
6872 wrfl();
6873 wr32(E1000_RAH(index), rar_high);
6874 wrfl();
6877 static int igb_set_vf_mac(struct igb_adapter *adapter,
6878 int vf, unsigned char *mac_addr)
6880 struct e1000_hw *hw = &adapter->hw;
6881 /* VF MAC addresses start at end of receive addresses and moves
6882 * torwards the first, as a result a collision should not be possible */
6883 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6885 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6887 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6889 return 0;
6892 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6894 struct igb_adapter *adapter = netdev_priv(netdev);
6895 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6896 return -EINVAL;
6897 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6898 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6899 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6900 " change effective.");
6901 if (test_bit(__IGB_DOWN, &adapter->state)) {
6902 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6903 " but the PF device is not up.\n");
6904 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6905 " attempting to use the VF device.\n");
6907 return igb_set_vf_mac(adapter, vf, mac);
6910 static int igb_link_mbps(int internal_link_speed)
6912 switch (internal_link_speed) {
6913 case SPEED_100:
6914 return 100;
6915 case SPEED_1000:
6916 return 1000;
6917 default:
6918 return 0;
6922 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6923 int link_speed)
6925 int rf_dec, rf_int;
6926 u32 bcnrc_val;
6928 if (tx_rate != 0) {
6929 /* Calculate the rate factor values to set */
6930 rf_int = link_speed / tx_rate;
6931 rf_dec = (link_speed - (rf_int * tx_rate));
6932 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6934 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6935 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6936 E1000_RTTBCNRC_RF_INT_MASK);
6937 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6938 } else {
6939 bcnrc_val = 0;
6942 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6943 wr32(E1000_RTTBCNRC, bcnrc_val);
6946 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6948 int actual_link_speed, i;
6949 bool reset_rate = false;
6951 /* VF TX rate limit was not set or not supported */
6952 if ((adapter->vf_rate_link_speed == 0) ||
6953 (adapter->hw.mac.type != e1000_82576))
6954 return;
6956 actual_link_speed = igb_link_mbps(adapter->link_speed);
6957 if (actual_link_speed != adapter->vf_rate_link_speed) {
6958 reset_rate = true;
6959 adapter->vf_rate_link_speed = 0;
6960 dev_info(&adapter->pdev->dev,
6961 "Link speed has been changed. VF Transmit "
6962 "rate is disabled\n");
6965 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6966 if (reset_rate)
6967 adapter->vf_data[i].tx_rate = 0;
6969 igb_set_vf_rate_limit(&adapter->hw, i,
6970 adapter->vf_data[i].tx_rate,
6971 actual_link_speed);
6975 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6977 struct igb_adapter *adapter = netdev_priv(netdev);
6978 struct e1000_hw *hw = &adapter->hw;
6979 int actual_link_speed;
6981 if (hw->mac.type != e1000_82576)
6982 return -EOPNOTSUPP;
6984 actual_link_speed = igb_link_mbps(adapter->link_speed);
6985 if ((vf >= adapter->vfs_allocated_count) ||
6986 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6987 (tx_rate < 0) || (tx_rate > actual_link_speed))
6988 return -EINVAL;
6990 adapter->vf_rate_link_speed = actual_link_speed;
6991 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6992 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6994 return 0;
6997 static int igb_ndo_get_vf_config(struct net_device *netdev,
6998 int vf, struct ifla_vf_info *ivi)
7000 struct igb_adapter *adapter = netdev_priv(netdev);
7001 if (vf >= adapter->vfs_allocated_count)
7002 return -EINVAL;
7003 ivi->vf = vf;
7004 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7005 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7006 ivi->vlan = adapter->vf_data[vf].pf_vlan;
7007 ivi->qos = adapter->vf_data[vf].pf_qos;
7008 return 0;
7011 static void igb_vmm_control(struct igb_adapter *adapter)
7013 struct e1000_hw *hw = &adapter->hw;
7014 u32 reg;
7016 switch (hw->mac.type) {
7017 case e1000_82575:
7018 default:
7019 /* replication is not supported for 82575 */
7020 return;
7021 case e1000_82576:
7022 /* notify HW that the MAC is adding vlan tags */
7023 reg = rd32(E1000_DTXCTL);
7024 reg |= E1000_DTXCTL_VLAN_ADDED;
7025 wr32(E1000_DTXCTL, reg);
7026 case e1000_82580:
7027 /* enable replication vlan tag stripping */
7028 reg = rd32(E1000_RPLOLR);
7029 reg |= E1000_RPLOLR_STRVLAN;
7030 wr32(E1000_RPLOLR, reg);
7031 case e1000_i350:
7032 /* none of the above registers are supported by i350 */
7033 break;
7036 if (adapter->vfs_allocated_count) {
7037 igb_vmdq_set_loopback_pf(hw, true);
7038 igb_vmdq_set_replication_pf(hw, true);
7039 igb_vmdq_set_anti_spoofing_pf(hw, true,
7040 adapter->vfs_allocated_count);
7041 } else {
7042 igb_vmdq_set_loopback_pf(hw, false);
7043 igb_vmdq_set_replication_pf(hw, false);
7047 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7049 struct e1000_hw *hw = &adapter->hw;
7050 u32 dmac_thr;
7051 u16 hwm;
7053 if (hw->mac.type > e1000_82580) {
7054 if (adapter->flags & IGB_FLAG_DMAC) {
7055 u32 reg;
7057 /* force threshold to 0. */
7058 wr32(E1000_DMCTXTH, 0);
7061 * DMA Coalescing high water mark needs to be higher
7062 * than the RX threshold. set hwm to PBA - 2 * max
7063 * frame size
7065 hwm = pba - (2 * adapter->max_frame_size);
7066 reg = rd32(E1000_DMACR);
7067 reg &= ~E1000_DMACR_DMACTHR_MASK;
7068 dmac_thr = pba - 4;
7070 reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7071 & E1000_DMACR_DMACTHR_MASK);
7073 /* transition to L0x or L1 if available..*/
7074 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7076 /* watchdog timer= +-1000 usec in 32usec intervals */
7077 reg |= (1000 >> 5);
7078 wr32(E1000_DMACR, reg);
7081 * no lower threshold to disable
7082 * coalescing(smart fifb)-UTRESH=0
7084 wr32(E1000_DMCRTRH, 0);
7085 wr32(E1000_FCRTC, hwm);
7087 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7089 wr32(E1000_DMCTLX, reg);
7092 * free space in tx packet buffer to wake from
7093 * DMA coal
7095 wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7096 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7099 * make low power state decision controlled
7100 * by DMA coal
7102 reg = rd32(E1000_PCIEMISC);
7103 reg &= ~E1000_PCIEMISC_LX_DECISION;
7104 wr32(E1000_PCIEMISC, reg);
7105 } /* endif adapter->dmac is not disabled */
7106 } else if (hw->mac.type == e1000_82580) {
7107 u32 reg = rd32(E1000_PCIEMISC);
7108 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7109 wr32(E1000_DMACR, 0);
7113 /* igb_main.c */