igb: Enable PF side of SR-IOV support for i350 devices
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / drivers / net / igb / igb_main.c
blobcb6bf7b815aee3b4fdf8df8a62ef0a0bec288916
1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
60 static const struct e1000_info *igb_info_tbl[] = {
61 [board_82575] = &e1000_82575_info,
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
90 /* required last entry */
91 {0, }
94 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
96 void igb_reset(struct igb_adapter *);
97 static int igb_setup_all_tx_resources(struct igb_adapter *);
98 static int igb_setup_all_rx_resources(struct igb_adapter *);
99 static void igb_free_all_tx_resources(struct igb_adapter *);
100 static void igb_free_all_rx_resources(struct igb_adapter *);
101 static void igb_setup_mrqc(struct igb_adapter *);
102 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
103 static void __devexit igb_remove(struct pci_dev *pdev);
104 static int igb_sw_init(struct igb_adapter *);
105 static int igb_open(struct net_device *);
106 static int igb_close(struct net_device *);
107 static void igb_configure_tx(struct igb_adapter *);
108 static void igb_configure_rx(struct igb_adapter *);
109 static void igb_clean_all_tx_rings(struct igb_adapter *);
110 static void igb_clean_all_rx_rings(struct igb_adapter *);
111 static void igb_clean_tx_ring(struct igb_ring *);
112 static void igb_clean_rx_ring(struct igb_ring *);
113 static void igb_set_rx_mode(struct net_device *);
114 static void igb_update_phy_info(unsigned long);
115 static void igb_watchdog(unsigned long);
116 static void igb_watchdog_task(struct work_struct *);
117 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
118 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
119 struct rtnl_link_stats64 *stats);
120 static int igb_change_mtu(struct net_device *, int);
121 static int igb_set_mac(struct net_device *, void *);
122 static void igb_set_uta(struct igb_adapter *adapter);
123 static irqreturn_t igb_intr(int irq, void *);
124 static irqreturn_t igb_intr_msi(int irq, void *);
125 static irqreturn_t igb_msix_other(int irq, void *);
126 static irqreturn_t igb_msix_ring(int irq, void *);
127 #ifdef CONFIG_IGB_DCA
128 static void igb_update_dca(struct igb_q_vector *);
129 static void igb_setup_dca(struct igb_adapter *);
130 #endif /* CONFIG_IGB_DCA */
131 static bool igb_clean_tx_irq(struct igb_q_vector *);
132 static int igb_poll(struct napi_struct *, int);
133 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
134 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
135 static void igb_tx_timeout(struct net_device *);
136 static void igb_reset_task(struct work_struct *);
137 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
138 static void igb_vlan_rx_add_vid(struct net_device *, u16);
139 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
140 static void igb_restore_vlan(struct igb_adapter *);
141 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
142 static void igb_ping_all_vfs(struct igb_adapter *);
143 static void igb_msg_task(struct igb_adapter *);
144 static void igb_vmm_control(struct igb_adapter *);
145 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
146 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
147 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
148 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
149 int vf, u16 vlan, u8 qos);
150 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
151 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
152 struct ifla_vf_info *ivi);
154 #ifdef CONFIG_PM
155 static int igb_suspend(struct pci_dev *, pm_message_t);
156 static int igb_resume(struct pci_dev *);
157 #endif
158 static void igb_shutdown(struct pci_dev *);
159 #ifdef CONFIG_IGB_DCA
160 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
161 static struct notifier_block dca_notifier = {
162 .notifier_call = igb_notify_dca,
163 .next = NULL,
164 .priority = 0
166 #endif
167 #ifdef CONFIG_NET_POLL_CONTROLLER
168 /* for netdump / net console */
169 static void igb_netpoll(struct net_device *);
170 #endif
171 #ifdef CONFIG_PCI_IOV
172 static unsigned int max_vfs = 0;
173 module_param(max_vfs, uint, 0);
174 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
175 "per physical function");
176 #endif /* CONFIG_PCI_IOV */
178 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
179 pci_channel_state_t);
180 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
181 static void igb_io_resume(struct pci_dev *);
183 static struct pci_error_handlers igb_err_handler = {
184 .error_detected = igb_io_error_detected,
185 .slot_reset = igb_io_slot_reset,
186 .resume = igb_io_resume,
190 static struct pci_driver igb_driver = {
191 .name = igb_driver_name,
192 .id_table = igb_pci_tbl,
193 .probe = igb_probe,
194 .remove = __devexit_p(igb_remove),
195 #ifdef CONFIG_PM
196 /* Power Managment Hooks */
197 .suspend = igb_suspend,
198 .resume = igb_resume,
199 #endif
200 .shutdown = igb_shutdown,
201 .err_handler = &igb_err_handler
204 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
205 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
206 MODULE_LICENSE("GPL");
207 MODULE_VERSION(DRV_VERSION);
209 struct igb_reg_info {
210 u32 ofs;
211 char *name;
214 static const struct igb_reg_info igb_reg_info_tbl[] = {
216 /* General Registers */
217 {E1000_CTRL, "CTRL"},
218 {E1000_STATUS, "STATUS"},
219 {E1000_CTRL_EXT, "CTRL_EXT"},
221 /* Interrupt Registers */
222 {E1000_ICR, "ICR"},
224 /* RX Registers */
225 {E1000_RCTL, "RCTL"},
226 {E1000_RDLEN(0), "RDLEN"},
227 {E1000_RDH(0), "RDH"},
228 {E1000_RDT(0), "RDT"},
229 {E1000_RXDCTL(0), "RXDCTL"},
230 {E1000_RDBAL(0), "RDBAL"},
231 {E1000_RDBAH(0), "RDBAH"},
233 /* TX Registers */
234 {E1000_TCTL, "TCTL"},
235 {E1000_TDBAL(0), "TDBAL"},
236 {E1000_TDBAH(0), "TDBAH"},
237 {E1000_TDLEN(0), "TDLEN"},
238 {E1000_TDH(0), "TDH"},
239 {E1000_TDT(0), "TDT"},
240 {E1000_TXDCTL(0), "TXDCTL"},
241 {E1000_TDFH, "TDFH"},
242 {E1000_TDFT, "TDFT"},
243 {E1000_TDFHS, "TDFHS"},
244 {E1000_TDFPC, "TDFPC"},
246 /* List Terminator */
251 * igb_regdump - register printout routine
253 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
255 int n = 0;
256 char rname[16];
257 u32 regs[8];
259 switch (reginfo->ofs) {
260 case E1000_RDLEN(0):
261 for (n = 0; n < 4; n++)
262 regs[n] = rd32(E1000_RDLEN(n));
263 break;
264 case E1000_RDH(0):
265 for (n = 0; n < 4; n++)
266 regs[n] = rd32(E1000_RDH(n));
267 break;
268 case E1000_RDT(0):
269 for (n = 0; n < 4; n++)
270 regs[n] = rd32(E1000_RDT(n));
271 break;
272 case E1000_RXDCTL(0):
273 for (n = 0; n < 4; n++)
274 regs[n] = rd32(E1000_RXDCTL(n));
275 break;
276 case E1000_RDBAL(0):
277 for (n = 0; n < 4; n++)
278 regs[n] = rd32(E1000_RDBAL(n));
279 break;
280 case E1000_RDBAH(0):
281 for (n = 0; n < 4; n++)
282 regs[n] = rd32(E1000_RDBAH(n));
283 break;
284 case E1000_TDBAL(0):
285 for (n = 0; n < 4; n++)
286 regs[n] = rd32(E1000_RDBAL(n));
287 break;
288 case E1000_TDBAH(0):
289 for (n = 0; n < 4; n++)
290 regs[n] = rd32(E1000_TDBAH(n));
291 break;
292 case E1000_TDLEN(0):
293 for (n = 0; n < 4; n++)
294 regs[n] = rd32(E1000_TDLEN(n));
295 break;
296 case E1000_TDH(0):
297 for (n = 0; n < 4; n++)
298 regs[n] = rd32(E1000_TDH(n));
299 break;
300 case E1000_TDT(0):
301 for (n = 0; n < 4; n++)
302 regs[n] = rd32(E1000_TDT(n));
303 break;
304 case E1000_TXDCTL(0):
305 for (n = 0; n < 4; n++)
306 regs[n] = rd32(E1000_TXDCTL(n));
307 break;
308 default:
309 printk(KERN_INFO "%-15s %08x\n",
310 reginfo->name, rd32(reginfo->ofs));
311 return;
314 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
315 printk(KERN_INFO "%-15s ", rname);
316 for (n = 0; n < 4; n++)
317 printk(KERN_CONT "%08x ", regs[n]);
318 printk(KERN_CONT "\n");
322 * igb_dump - Print registers, tx-rings and rx-rings
324 static void igb_dump(struct igb_adapter *adapter)
326 struct net_device *netdev = adapter->netdev;
327 struct e1000_hw *hw = &adapter->hw;
328 struct igb_reg_info *reginfo;
329 int n = 0;
330 struct igb_ring *tx_ring;
331 union e1000_adv_tx_desc *tx_desc;
332 struct my_u0 { u64 a; u64 b; } *u0;
333 struct igb_buffer *buffer_info;
334 struct igb_ring *rx_ring;
335 union e1000_adv_rx_desc *rx_desc;
336 u32 staterr;
337 int i = 0;
339 if (!netif_msg_hw(adapter))
340 return;
342 /* Print netdevice Info */
343 if (netdev) {
344 dev_info(&adapter->pdev->dev, "Net device Info\n");
345 printk(KERN_INFO "Device Name state "
346 "trans_start last_rx\n");
347 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
348 netdev->name,
349 netdev->state,
350 netdev->trans_start,
351 netdev->last_rx);
354 /* Print Registers */
355 dev_info(&adapter->pdev->dev, "Register Dump\n");
356 printk(KERN_INFO " Register Name Value\n");
357 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
358 reginfo->name; reginfo++) {
359 igb_regdump(hw, reginfo);
362 /* Print TX Ring Summary */
363 if (!netdev || !netif_running(netdev))
364 goto exit;
366 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
367 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
368 " leng ntw timestamp\n");
369 for (n = 0; n < adapter->num_tx_queues; n++) {
370 tx_ring = adapter->tx_ring[n];
371 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
372 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
373 n, tx_ring->next_to_use, tx_ring->next_to_clean,
374 (u64)buffer_info->dma,
375 buffer_info->length,
376 buffer_info->next_to_watch,
377 (u64)buffer_info->time_stamp);
380 /* Print TX Rings */
381 if (!netif_msg_tx_done(adapter))
382 goto rx_ring_summary;
384 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
386 /* Transmit Descriptor Formats
388 * Advanced Transmit Descriptor
389 * +--------------------------------------------------------------+
390 * 0 | Buffer Address [63:0] |
391 * +--------------------------------------------------------------+
392 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
393 * +--------------------------------------------------------------+
394 * 63 46 45 40 39 38 36 35 32 31 24 15 0
397 for (n = 0; n < adapter->num_tx_queues; n++) {
398 tx_ring = adapter->tx_ring[n];
399 printk(KERN_INFO "------------------------------------\n");
400 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
401 printk(KERN_INFO "------------------------------------\n");
402 printk(KERN_INFO "T [desc] [address 63:0 ] "
403 "[PlPOCIStDDM Ln] [bi->dma ] "
404 "leng ntw timestamp bi->skb\n");
406 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
407 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
408 buffer_info = &tx_ring->buffer_info[i];
409 u0 = (struct my_u0 *)tx_desc;
410 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
411 " %04X %3X %016llX %p", i,
412 le64_to_cpu(u0->a),
413 le64_to_cpu(u0->b),
414 (u64)buffer_info->dma,
415 buffer_info->length,
416 buffer_info->next_to_watch,
417 (u64)buffer_info->time_stamp,
418 buffer_info->skb);
419 if (i == tx_ring->next_to_use &&
420 i == tx_ring->next_to_clean)
421 printk(KERN_CONT " NTC/U\n");
422 else if (i == tx_ring->next_to_use)
423 printk(KERN_CONT " NTU\n");
424 else if (i == tx_ring->next_to_clean)
425 printk(KERN_CONT " NTC\n");
426 else
427 printk(KERN_CONT "\n");
429 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
430 print_hex_dump(KERN_INFO, "",
431 DUMP_PREFIX_ADDRESS,
432 16, 1, phys_to_virt(buffer_info->dma),
433 buffer_info->length, true);
437 /* Print RX Rings Summary */
438 rx_ring_summary:
439 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
440 printk(KERN_INFO "Queue [NTU] [NTC]\n");
441 for (n = 0; n < adapter->num_rx_queues; n++) {
442 rx_ring = adapter->rx_ring[n];
443 printk(KERN_INFO " %5d %5X %5X\n", n,
444 rx_ring->next_to_use, rx_ring->next_to_clean);
447 /* Print RX Rings */
448 if (!netif_msg_rx_status(adapter))
449 goto exit;
451 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
453 /* Advanced Receive Descriptor (Read) Format
454 * 63 1 0
455 * +-----------------------------------------------------+
456 * 0 | Packet Buffer Address [63:1] |A0/NSE|
457 * +----------------------------------------------+------+
458 * 8 | Header Buffer Address [63:1] | DD |
459 * +-----------------------------------------------------+
462 * Advanced Receive Descriptor (Write-Back) Format
464 * 63 48 47 32 31 30 21 20 17 16 4 3 0
465 * +------------------------------------------------------+
466 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
467 * | Checksum Ident | | | | Type | Type |
468 * +------------------------------------------------------+
469 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
470 * +------------------------------------------------------+
471 * 63 48 47 32 31 20 19 0
474 for (n = 0; n < adapter->num_rx_queues; n++) {
475 rx_ring = adapter->rx_ring[n];
476 printk(KERN_INFO "------------------------------------\n");
477 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
478 printk(KERN_INFO "------------------------------------\n");
479 printk(KERN_INFO "R [desc] [ PktBuf A0] "
480 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
481 "<-- Adv Rx Read format\n");
482 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
483 "[vl er S cks ln] ---------------- [bi->skb] "
484 "<-- Adv Rx Write-Back format\n");
486 for (i = 0; i < rx_ring->count; i++) {
487 buffer_info = &rx_ring->buffer_info[i];
488 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
489 u0 = (struct my_u0 *)rx_desc;
490 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
491 if (staterr & E1000_RXD_STAT_DD) {
492 /* Descriptor Done */
493 printk(KERN_INFO "RWB[0x%03X] %016llX "
494 "%016llX ---------------- %p", i,
495 le64_to_cpu(u0->a),
496 le64_to_cpu(u0->b),
497 buffer_info->skb);
498 } else {
499 printk(KERN_INFO "R [0x%03X] %016llX "
500 "%016llX %016llX %p", i,
501 le64_to_cpu(u0->a),
502 le64_to_cpu(u0->b),
503 (u64)buffer_info->dma,
504 buffer_info->skb);
506 if (netif_msg_pktdata(adapter)) {
507 print_hex_dump(KERN_INFO, "",
508 DUMP_PREFIX_ADDRESS,
509 16, 1,
510 phys_to_virt(buffer_info->dma),
511 rx_ring->rx_buffer_len, true);
512 if (rx_ring->rx_buffer_len
513 < IGB_RXBUFFER_1024)
514 print_hex_dump(KERN_INFO, "",
515 DUMP_PREFIX_ADDRESS,
516 16, 1,
517 phys_to_virt(
518 buffer_info->page_dma +
519 buffer_info->page_offset),
520 PAGE_SIZE/2, true);
524 if (i == rx_ring->next_to_use)
525 printk(KERN_CONT " NTU\n");
526 else if (i == rx_ring->next_to_clean)
527 printk(KERN_CONT " NTC\n");
528 else
529 printk(KERN_CONT "\n");
534 exit:
535 return;
540 * igb_read_clock - read raw cycle counter (to be used by time counter)
542 static cycle_t igb_read_clock(const struct cyclecounter *tc)
544 struct igb_adapter *adapter =
545 container_of(tc, struct igb_adapter, cycles);
546 struct e1000_hw *hw = &adapter->hw;
547 u64 stamp = 0;
548 int shift = 0;
551 * The timestamp latches on lowest register read. For the 82580
552 * the lowest register is SYSTIMR instead of SYSTIML. However we never
553 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
555 if (hw->mac.type == e1000_82580) {
556 stamp = rd32(E1000_SYSTIMR) >> 8;
557 shift = IGB_82580_TSYNC_SHIFT;
560 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
561 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
562 return stamp;
566 * igb_get_hw_dev - return device
567 * used by hardware layer to print debugging information
569 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
571 struct igb_adapter *adapter = hw->back;
572 return adapter->netdev;
576 * igb_init_module - Driver Registration Routine
578 * igb_init_module is the first routine called when the driver is
579 * loaded. All it does is register with the PCI subsystem.
581 static int __init igb_init_module(void)
583 int ret;
584 printk(KERN_INFO "%s - version %s\n",
585 igb_driver_string, igb_driver_version);
587 printk(KERN_INFO "%s\n", igb_copyright);
589 #ifdef CONFIG_IGB_DCA
590 dca_register_notify(&dca_notifier);
591 #endif
592 ret = pci_register_driver(&igb_driver);
593 return ret;
596 module_init(igb_init_module);
599 * igb_exit_module - Driver Exit Cleanup Routine
601 * igb_exit_module is called just before the driver is removed
602 * from memory.
604 static void __exit igb_exit_module(void)
606 #ifdef CONFIG_IGB_DCA
607 dca_unregister_notify(&dca_notifier);
608 #endif
609 pci_unregister_driver(&igb_driver);
612 module_exit(igb_exit_module);
614 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
616 * igb_cache_ring_register - Descriptor ring to register mapping
617 * @adapter: board private structure to initialize
619 * Once we know the feature-set enabled for the device, we'll cache
620 * the register offset the descriptor ring is assigned to.
622 static void igb_cache_ring_register(struct igb_adapter *adapter)
624 int i = 0, j = 0;
625 u32 rbase_offset = adapter->vfs_allocated_count;
627 switch (adapter->hw.mac.type) {
628 case e1000_82576:
629 /* The queues are allocated for virtualization such that VF 0
630 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
631 * In order to avoid collision we start at the first free queue
632 * and continue consuming queues in the same sequence
634 if (adapter->vfs_allocated_count) {
635 for (; i < adapter->rss_queues; i++)
636 adapter->rx_ring[i]->reg_idx = rbase_offset +
637 Q_IDX_82576(i);
639 case e1000_82575:
640 case e1000_82580:
641 case e1000_i350:
642 default:
643 for (; i < adapter->num_rx_queues; i++)
644 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
645 for (; j < adapter->num_tx_queues; j++)
646 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
647 break;
651 static void igb_free_queues(struct igb_adapter *adapter)
653 int i;
655 for (i = 0; i < adapter->num_tx_queues; i++) {
656 kfree(adapter->tx_ring[i]);
657 adapter->tx_ring[i] = NULL;
659 for (i = 0; i < adapter->num_rx_queues; i++) {
660 kfree(adapter->rx_ring[i]);
661 adapter->rx_ring[i] = NULL;
663 adapter->num_rx_queues = 0;
664 adapter->num_tx_queues = 0;
668 * igb_alloc_queues - Allocate memory for all rings
669 * @adapter: board private structure to initialize
671 * We allocate one ring per queue at run-time since we don't know the
672 * number of queues at compile-time.
674 static int igb_alloc_queues(struct igb_adapter *adapter)
676 struct igb_ring *ring;
677 int i;
679 for (i = 0; i < adapter->num_tx_queues; i++) {
680 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
681 if (!ring)
682 goto err;
683 ring->count = adapter->tx_ring_count;
684 ring->queue_index = i;
685 ring->dev = &adapter->pdev->dev;
686 ring->netdev = adapter->netdev;
687 /* For 82575, context index must be unique per ring. */
688 if (adapter->hw.mac.type == e1000_82575)
689 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
690 adapter->tx_ring[i] = ring;
693 for (i = 0; i < adapter->num_rx_queues; i++) {
694 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
695 if (!ring)
696 goto err;
697 ring->count = adapter->rx_ring_count;
698 ring->queue_index = i;
699 ring->dev = &adapter->pdev->dev;
700 ring->netdev = adapter->netdev;
701 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
702 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
703 /* set flag indicating ring supports SCTP checksum offload */
704 if (adapter->hw.mac.type >= e1000_82576)
705 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
706 adapter->rx_ring[i] = ring;
709 igb_cache_ring_register(adapter);
711 return 0;
713 err:
714 igb_free_queues(adapter);
716 return -ENOMEM;
719 #define IGB_N0_QUEUE -1
720 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
722 u32 msixbm = 0;
723 struct igb_adapter *adapter = q_vector->adapter;
724 struct e1000_hw *hw = &adapter->hw;
725 u32 ivar, index;
726 int rx_queue = IGB_N0_QUEUE;
727 int tx_queue = IGB_N0_QUEUE;
729 if (q_vector->rx_ring)
730 rx_queue = q_vector->rx_ring->reg_idx;
731 if (q_vector->tx_ring)
732 tx_queue = q_vector->tx_ring->reg_idx;
734 switch (hw->mac.type) {
735 case e1000_82575:
736 /* The 82575 assigns vectors using a bitmask, which matches the
737 bitmask for the EICR/EIMS/EIMC registers. To assign one
738 or more queues to a vector, we write the appropriate bits
739 into the MSIXBM register for that vector. */
740 if (rx_queue > IGB_N0_QUEUE)
741 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
742 if (tx_queue > IGB_N0_QUEUE)
743 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
744 if (!adapter->msix_entries && msix_vector == 0)
745 msixbm |= E1000_EIMS_OTHER;
746 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
747 q_vector->eims_value = msixbm;
748 break;
749 case e1000_82576:
750 /* 82576 uses a table-based method for assigning vectors.
751 Each queue has a single entry in the table to which we write
752 a vector number along with a "valid" bit. Sadly, the layout
753 of the table is somewhat counterintuitive. */
754 if (rx_queue > IGB_N0_QUEUE) {
755 index = (rx_queue & 0x7);
756 ivar = array_rd32(E1000_IVAR0, index);
757 if (rx_queue < 8) {
758 /* vector goes into low byte of register */
759 ivar = ivar & 0xFFFFFF00;
760 ivar |= msix_vector | E1000_IVAR_VALID;
761 } else {
762 /* vector goes into third byte of register */
763 ivar = ivar & 0xFF00FFFF;
764 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
766 array_wr32(E1000_IVAR0, index, ivar);
768 if (tx_queue > IGB_N0_QUEUE) {
769 index = (tx_queue & 0x7);
770 ivar = array_rd32(E1000_IVAR0, index);
771 if (tx_queue < 8) {
772 /* vector goes into second byte of register */
773 ivar = ivar & 0xFFFF00FF;
774 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
775 } else {
776 /* vector goes into high byte of register */
777 ivar = ivar & 0x00FFFFFF;
778 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
780 array_wr32(E1000_IVAR0, index, ivar);
782 q_vector->eims_value = 1 << msix_vector;
783 break;
784 case e1000_82580:
785 case e1000_i350:
786 /* 82580 uses the same table-based approach as 82576 but has fewer
787 entries as a result we carry over for queues greater than 4. */
788 if (rx_queue > IGB_N0_QUEUE) {
789 index = (rx_queue >> 1);
790 ivar = array_rd32(E1000_IVAR0, index);
791 if (rx_queue & 0x1) {
792 /* vector goes into third byte of register */
793 ivar = ivar & 0xFF00FFFF;
794 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
795 } else {
796 /* vector goes into low byte of register */
797 ivar = ivar & 0xFFFFFF00;
798 ivar |= msix_vector | E1000_IVAR_VALID;
800 array_wr32(E1000_IVAR0, index, ivar);
802 if (tx_queue > IGB_N0_QUEUE) {
803 index = (tx_queue >> 1);
804 ivar = array_rd32(E1000_IVAR0, index);
805 if (tx_queue & 0x1) {
806 /* vector goes into high byte of register */
807 ivar = ivar & 0x00FFFFFF;
808 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
809 } else {
810 /* vector goes into second byte of register */
811 ivar = ivar & 0xFFFF00FF;
812 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
814 array_wr32(E1000_IVAR0, index, ivar);
816 q_vector->eims_value = 1 << msix_vector;
817 break;
818 default:
819 BUG();
820 break;
823 /* add q_vector eims value to global eims_enable_mask */
824 adapter->eims_enable_mask |= q_vector->eims_value;
826 /* configure q_vector to set itr on first interrupt */
827 q_vector->set_itr = 1;
831 * igb_configure_msix - Configure MSI-X hardware
833 * igb_configure_msix sets up the hardware to properly
834 * generate MSI-X interrupts.
836 static void igb_configure_msix(struct igb_adapter *adapter)
838 u32 tmp;
839 int i, vector = 0;
840 struct e1000_hw *hw = &adapter->hw;
842 adapter->eims_enable_mask = 0;
844 /* set vector for other causes, i.e. link changes */
845 switch (hw->mac.type) {
846 case e1000_82575:
847 tmp = rd32(E1000_CTRL_EXT);
848 /* enable MSI-X PBA support*/
849 tmp |= E1000_CTRL_EXT_PBA_CLR;
851 /* Auto-Mask interrupts upon ICR read. */
852 tmp |= E1000_CTRL_EXT_EIAME;
853 tmp |= E1000_CTRL_EXT_IRCA;
855 wr32(E1000_CTRL_EXT, tmp);
857 /* enable msix_other interrupt */
858 array_wr32(E1000_MSIXBM(0), vector++,
859 E1000_EIMS_OTHER);
860 adapter->eims_other = E1000_EIMS_OTHER;
862 break;
864 case e1000_82576:
865 case e1000_82580:
866 case e1000_i350:
867 /* Turn on MSI-X capability first, or our settings
868 * won't stick. And it will take days to debug. */
869 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
870 E1000_GPIE_PBA | E1000_GPIE_EIAME |
871 E1000_GPIE_NSICR);
873 /* enable msix_other interrupt */
874 adapter->eims_other = 1 << vector;
875 tmp = (vector++ | E1000_IVAR_VALID) << 8;
877 wr32(E1000_IVAR_MISC, tmp);
878 break;
879 default:
880 /* do nothing, since nothing else supports MSI-X */
881 break;
882 } /* switch (hw->mac.type) */
884 adapter->eims_enable_mask |= adapter->eims_other;
886 for (i = 0; i < adapter->num_q_vectors; i++)
887 igb_assign_vector(adapter->q_vector[i], vector++);
889 wrfl();
893 * igb_request_msix - Initialize MSI-X interrupts
895 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
896 * kernel.
898 static int igb_request_msix(struct igb_adapter *adapter)
900 struct net_device *netdev = adapter->netdev;
901 struct e1000_hw *hw = &adapter->hw;
902 int i, err = 0, vector = 0;
904 err = request_irq(adapter->msix_entries[vector].vector,
905 igb_msix_other, 0, netdev->name, adapter);
906 if (err)
907 goto out;
908 vector++;
910 for (i = 0; i < adapter->num_q_vectors; i++) {
911 struct igb_q_vector *q_vector = adapter->q_vector[i];
913 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
915 if (q_vector->rx_ring && q_vector->tx_ring)
916 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
917 q_vector->rx_ring->queue_index);
918 else if (q_vector->tx_ring)
919 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
920 q_vector->tx_ring->queue_index);
921 else if (q_vector->rx_ring)
922 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
923 q_vector->rx_ring->queue_index);
924 else
925 sprintf(q_vector->name, "%s-unused", netdev->name);
927 err = request_irq(adapter->msix_entries[vector].vector,
928 igb_msix_ring, 0, q_vector->name,
929 q_vector);
930 if (err)
931 goto out;
932 vector++;
935 igb_configure_msix(adapter);
936 return 0;
937 out:
938 return err;
941 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
943 if (adapter->msix_entries) {
944 pci_disable_msix(adapter->pdev);
945 kfree(adapter->msix_entries);
946 adapter->msix_entries = NULL;
947 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
948 pci_disable_msi(adapter->pdev);
953 * igb_free_q_vectors - Free memory allocated for interrupt vectors
954 * @adapter: board private structure to initialize
956 * This function frees the memory allocated to the q_vectors. In addition if
957 * NAPI is enabled it will delete any references to the NAPI struct prior
958 * to freeing the q_vector.
960 static void igb_free_q_vectors(struct igb_adapter *adapter)
962 int v_idx;
964 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
965 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
966 adapter->q_vector[v_idx] = NULL;
967 if (!q_vector)
968 continue;
969 netif_napi_del(&q_vector->napi);
970 kfree(q_vector);
972 adapter->num_q_vectors = 0;
976 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
978 * This function resets the device so that it has 0 rx queues, tx queues, and
979 * MSI-X interrupts allocated.
981 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
983 igb_free_queues(adapter);
984 igb_free_q_vectors(adapter);
985 igb_reset_interrupt_capability(adapter);
989 * igb_set_interrupt_capability - set MSI or MSI-X if supported
991 * Attempt to configure interrupts using the best available
992 * capabilities of the hardware and kernel.
994 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
996 int err;
997 int numvecs, i;
999 /* Number of supported queues. */
1000 adapter->num_rx_queues = adapter->rss_queues;
1001 if (adapter->vfs_allocated_count)
1002 adapter->num_tx_queues = 1;
1003 else
1004 adapter->num_tx_queues = adapter->rss_queues;
1006 /* start with one vector for every rx queue */
1007 numvecs = adapter->num_rx_queues;
1009 /* if tx handler is separate add 1 for every tx queue */
1010 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1011 numvecs += adapter->num_tx_queues;
1013 /* store the number of vectors reserved for queues */
1014 adapter->num_q_vectors = numvecs;
1016 /* add 1 vector for link status interrupts */
1017 numvecs++;
1018 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1019 GFP_KERNEL);
1020 if (!adapter->msix_entries)
1021 goto msi_only;
1023 for (i = 0; i < numvecs; i++)
1024 adapter->msix_entries[i].entry = i;
1026 err = pci_enable_msix(adapter->pdev,
1027 adapter->msix_entries,
1028 numvecs);
1029 if (err == 0)
1030 goto out;
1032 igb_reset_interrupt_capability(adapter);
1034 /* If we can't do MSI-X, try MSI */
1035 msi_only:
1036 #ifdef CONFIG_PCI_IOV
1037 /* disable SR-IOV for non MSI-X configurations */
1038 if (adapter->vf_data) {
1039 struct e1000_hw *hw = &adapter->hw;
1040 /* disable iov and allow time for transactions to clear */
1041 pci_disable_sriov(adapter->pdev);
1042 msleep(500);
1044 kfree(adapter->vf_data);
1045 adapter->vf_data = NULL;
1046 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1047 msleep(100);
1048 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1050 #endif
1051 adapter->vfs_allocated_count = 0;
1052 adapter->rss_queues = 1;
1053 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1054 adapter->num_rx_queues = 1;
1055 adapter->num_tx_queues = 1;
1056 adapter->num_q_vectors = 1;
1057 if (!pci_enable_msi(adapter->pdev))
1058 adapter->flags |= IGB_FLAG_HAS_MSI;
1059 out:
1060 /* Notify the stack of the (possibly) reduced queue counts. */
1061 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1062 return netif_set_real_num_rx_queues(adapter->netdev,
1063 adapter->num_rx_queues);
1067 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1068 * @adapter: board private structure to initialize
1070 * We allocate one q_vector per queue interrupt. If allocation fails we
1071 * return -ENOMEM.
1073 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1075 struct igb_q_vector *q_vector;
1076 struct e1000_hw *hw = &adapter->hw;
1077 int v_idx;
1079 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1080 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1081 if (!q_vector)
1082 goto err_out;
1083 q_vector->adapter = adapter;
1084 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1085 q_vector->itr_val = IGB_START_ITR;
1086 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1087 adapter->q_vector[v_idx] = q_vector;
1089 return 0;
1091 err_out:
1092 igb_free_q_vectors(adapter);
1093 return -ENOMEM;
1096 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1097 int ring_idx, int v_idx)
1099 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1101 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1102 q_vector->rx_ring->q_vector = q_vector;
1103 q_vector->itr_val = adapter->rx_itr_setting;
1104 if (q_vector->itr_val && q_vector->itr_val <= 3)
1105 q_vector->itr_val = IGB_START_ITR;
1108 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1109 int ring_idx, int v_idx)
1111 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1113 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1114 q_vector->tx_ring->q_vector = q_vector;
1115 q_vector->itr_val = adapter->tx_itr_setting;
1116 if (q_vector->itr_val && q_vector->itr_val <= 3)
1117 q_vector->itr_val = IGB_START_ITR;
1121 * igb_map_ring_to_vector - maps allocated queues to vectors
1123 * This function maps the recently allocated queues to vectors.
1125 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1127 int i;
1128 int v_idx = 0;
1130 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1131 (adapter->num_q_vectors < adapter->num_tx_queues))
1132 return -ENOMEM;
1134 if (adapter->num_q_vectors >=
1135 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1136 for (i = 0; i < adapter->num_rx_queues; i++)
1137 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1138 for (i = 0; i < adapter->num_tx_queues; i++)
1139 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1140 } else {
1141 for (i = 0; i < adapter->num_rx_queues; i++) {
1142 if (i < adapter->num_tx_queues)
1143 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1144 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1146 for (; i < adapter->num_tx_queues; i++)
1147 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1149 return 0;
1153 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1155 * This function initializes the interrupts and allocates all of the queues.
1157 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1159 struct pci_dev *pdev = adapter->pdev;
1160 int err;
1162 err = igb_set_interrupt_capability(adapter);
1163 if (err)
1164 return err;
1166 err = igb_alloc_q_vectors(adapter);
1167 if (err) {
1168 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1169 goto err_alloc_q_vectors;
1172 err = igb_alloc_queues(adapter);
1173 if (err) {
1174 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1175 goto err_alloc_queues;
1178 err = igb_map_ring_to_vector(adapter);
1179 if (err) {
1180 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1181 goto err_map_queues;
1185 return 0;
1186 err_map_queues:
1187 igb_free_queues(adapter);
1188 err_alloc_queues:
1189 igb_free_q_vectors(adapter);
1190 err_alloc_q_vectors:
1191 igb_reset_interrupt_capability(adapter);
1192 return err;
1196 * igb_request_irq - initialize interrupts
1198 * Attempts to configure interrupts using the best available
1199 * capabilities of the hardware and kernel.
1201 static int igb_request_irq(struct igb_adapter *adapter)
1203 struct net_device *netdev = adapter->netdev;
1204 struct pci_dev *pdev = adapter->pdev;
1205 int err = 0;
1207 if (adapter->msix_entries) {
1208 err = igb_request_msix(adapter);
1209 if (!err)
1210 goto request_done;
1211 /* fall back to MSI */
1212 igb_clear_interrupt_scheme(adapter);
1213 if (!pci_enable_msi(adapter->pdev))
1214 adapter->flags |= IGB_FLAG_HAS_MSI;
1215 igb_free_all_tx_resources(adapter);
1216 igb_free_all_rx_resources(adapter);
1217 adapter->num_tx_queues = 1;
1218 adapter->num_rx_queues = 1;
1219 adapter->num_q_vectors = 1;
1220 err = igb_alloc_q_vectors(adapter);
1221 if (err) {
1222 dev_err(&pdev->dev,
1223 "Unable to allocate memory for vectors\n");
1224 goto request_done;
1226 err = igb_alloc_queues(adapter);
1227 if (err) {
1228 dev_err(&pdev->dev,
1229 "Unable to allocate memory for queues\n");
1230 igb_free_q_vectors(adapter);
1231 goto request_done;
1233 igb_setup_all_tx_resources(adapter);
1234 igb_setup_all_rx_resources(adapter);
1235 } else {
1236 igb_assign_vector(adapter->q_vector[0], 0);
1239 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1240 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1241 netdev->name, adapter);
1242 if (!err)
1243 goto request_done;
1245 /* fall back to legacy interrupts */
1246 igb_reset_interrupt_capability(adapter);
1247 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1250 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1251 netdev->name, adapter);
1253 if (err)
1254 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1255 err);
1257 request_done:
1258 return err;
1261 static void igb_free_irq(struct igb_adapter *adapter)
1263 if (adapter->msix_entries) {
1264 int vector = 0, i;
1266 free_irq(adapter->msix_entries[vector++].vector, adapter);
1268 for (i = 0; i < adapter->num_q_vectors; i++) {
1269 struct igb_q_vector *q_vector = adapter->q_vector[i];
1270 free_irq(adapter->msix_entries[vector++].vector,
1271 q_vector);
1273 } else {
1274 free_irq(adapter->pdev->irq, adapter);
1279 * igb_irq_disable - Mask off interrupt generation on the NIC
1280 * @adapter: board private structure
1282 static void igb_irq_disable(struct igb_adapter *adapter)
1284 struct e1000_hw *hw = &adapter->hw;
1287 * we need to be careful when disabling interrupts. The VFs are also
1288 * mapped into these registers and so clearing the bits can cause
1289 * issues on the VF drivers so we only need to clear what we set
1291 if (adapter->msix_entries) {
1292 u32 regval = rd32(E1000_EIAM);
1293 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1294 wr32(E1000_EIMC, adapter->eims_enable_mask);
1295 regval = rd32(E1000_EIAC);
1296 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1299 wr32(E1000_IAM, 0);
1300 wr32(E1000_IMC, ~0);
1301 wrfl();
1302 if (adapter->msix_entries) {
1303 int i;
1304 for (i = 0; i < adapter->num_q_vectors; i++)
1305 synchronize_irq(adapter->msix_entries[i].vector);
1306 } else {
1307 synchronize_irq(adapter->pdev->irq);
1312 * igb_irq_enable - Enable default interrupt generation settings
1313 * @adapter: board private structure
1315 static void igb_irq_enable(struct igb_adapter *adapter)
1317 struct e1000_hw *hw = &adapter->hw;
1319 if (adapter->msix_entries) {
1320 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1321 u32 regval = rd32(E1000_EIAC);
1322 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1323 regval = rd32(E1000_EIAM);
1324 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1325 wr32(E1000_EIMS, adapter->eims_enable_mask);
1326 if (adapter->vfs_allocated_count) {
1327 wr32(E1000_MBVFIMR, 0xFF);
1328 ims |= E1000_IMS_VMMB;
1330 if (adapter->hw.mac.type == e1000_82580)
1331 ims |= E1000_IMS_DRSTA;
1333 wr32(E1000_IMS, ims);
1334 } else {
1335 wr32(E1000_IMS, IMS_ENABLE_MASK |
1336 E1000_IMS_DRSTA);
1337 wr32(E1000_IAM, IMS_ENABLE_MASK |
1338 E1000_IMS_DRSTA);
1342 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1344 struct e1000_hw *hw = &adapter->hw;
1345 u16 vid = adapter->hw.mng_cookie.vlan_id;
1346 u16 old_vid = adapter->mng_vlan_id;
1348 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1349 /* add VID to filter table */
1350 igb_vfta_set(hw, vid, true);
1351 adapter->mng_vlan_id = vid;
1352 } else {
1353 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1356 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1357 (vid != old_vid) &&
1358 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1359 /* remove VID from filter table */
1360 igb_vfta_set(hw, old_vid, false);
1365 * igb_release_hw_control - release control of the h/w to f/w
1366 * @adapter: address of board private structure
1368 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1369 * For ASF and Pass Through versions of f/w this means that the
1370 * driver is no longer loaded.
1373 static void igb_release_hw_control(struct igb_adapter *adapter)
1375 struct e1000_hw *hw = &adapter->hw;
1376 u32 ctrl_ext;
1378 /* Let firmware take over control of h/w */
1379 ctrl_ext = rd32(E1000_CTRL_EXT);
1380 wr32(E1000_CTRL_EXT,
1381 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1385 * igb_get_hw_control - get control of the h/w from f/w
1386 * @adapter: address of board private structure
1388 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1389 * For ASF and Pass Through versions of f/w this means that
1390 * the driver is loaded.
1393 static void igb_get_hw_control(struct igb_adapter *adapter)
1395 struct e1000_hw *hw = &adapter->hw;
1396 u32 ctrl_ext;
1398 /* Let firmware know the driver has taken over */
1399 ctrl_ext = rd32(E1000_CTRL_EXT);
1400 wr32(E1000_CTRL_EXT,
1401 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1405 * igb_configure - configure the hardware for RX and TX
1406 * @adapter: private board structure
1408 static void igb_configure(struct igb_adapter *adapter)
1410 struct net_device *netdev = adapter->netdev;
1411 int i;
1413 igb_get_hw_control(adapter);
1414 igb_set_rx_mode(netdev);
1416 igb_restore_vlan(adapter);
1418 igb_setup_tctl(adapter);
1419 igb_setup_mrqc(adapter);
1420 igb_setup_rctl(adapter);
1422 igb_configure_tx(adapter);
1423 igb_configure_rx(adapter);
1425 igb_rx_fifo_flush_82575(&adapter->hw);
1427 /* call igb_desc_unused which always leaves
1428 * at least 1 descriptor unused to make sure
1429 * next_to_use != next_to_clean */
1430 for (i = 0; i < adapter->num_rx_queues; i++) {
1431 struct igb_ring *ring = adapter->rx_ring[i];
1432 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1437 * igb_power_up_link - Power up the phy/serdes link
1438 * @adapter: address of board private structure
1440 void igb_power_up_link(struct igb_adapter *adapter)
1442 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1443 igb_power_up_phy_copper(&adapter->hw);
1444 else
1445 igb_power_up_serdes_link_82575(&adapter->hw);
1449 * igb_power_down_link - Power down the phy/serdes link
1450 * @adapter: address of board private structure
1452 static void igb_power_down_link(struct igb_adapter *adapter)
1454 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1455 igb_power_down_phy_copper_82575(&adapter->hw);
1456 else
1457 igb_shutdown_serdes_link_82575(&adapter->hw);
1461 * igb_up - Open the interface and prepare it to handle traffic
1462 * @adapter: board private structure
1464 int igb_up(struct igb_adapter *adapter)
1466 struct e1000_hw *hw = &adapter->hw;
1467 int i;
1469 /* hardware has been reset, we need to reload some things */
1470 igb_configure(adapter);
1472 clear_bit(__IGB_DOWN, &adapter->state);
1474 for (i = 0; i < adapter->num_q_vectors; i++) {
1475 struct igb_q_vector *q_vector = adapter->q_vector[i];
1476 napi_enable(&q_vector->napi);
1478 if (adapter->msix_entries)
1479 igb_configure_msix(adapter);
1480 else
1481 igb_assign_vector(adapter->q_vector[0], 0);
1483 /* Clear any pending interrupts. */
1484 rd32(E1000_ICR);
1485 igb_irq_enable(adapter);
1487 /* notify VFs that reset has been completed */
1488 if (adapter->vfs_allocated_count) {
1489 u32 reg_data = rd32(E1000_CTRL_EXT);
1490 reg_data |= E1000_CTRL_EXT_PFRSTD;
1491 wr32(E1000_CTRL_EXT, reg_data);
1494 netif_tx_start_all_queues(adapter->netdev);
1496 /* start the watchdog. */
1497 hw->mac.get_link_status = 1;
1498 schedule_work(&adapter->watchdog_task);
1500 return 0;
1503 void igb_down(struct igb_adapter *adapter)
1505 struct net_device *netdev = adapter->netdev;
1506 struct e1000_hw *hw = &adapter->hw;
1507 u32 tctl, rctl;
1508 int i;
1510 /* signal that we're down so the interrupt handler does not
1511 * reschedule our watchdog timer */
1512 set_bit(__IGB_DOWN, &adapter->state);
1514 /* disable receives in the hardware */
1515 rctl = rd32(E1000_RCTL);
1516 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1517 /* flush and sleep below */
1519 netif_tx_stop_all_queues(netdev);
1521 /* disable transmits in the hardware */
1522 tctl = rd32(E1000_TCTL);
1523 tctl &= ~E1000_TCTL_EN;
1524 wr32(E1000_TCTL, tctl);
1525 /* flush both disables and wait for them to finish */
1526 wrfl();
1527 msleep(10);
1529 for (i = 0; i < adapter->num_q_vectors; i++) {
1530 struct igb_q_vector *q_vector = adapter->q_vector[i];
1531 napi_disable(&q_vector->napi);
1534 igb_irq_disable(adapter);
1536 del_timer_sync(&adapter->watchdog_timer);
1537 del_timer_sync(&adapter->phy_info_timer);
1539 netif_carrier_off(netdev);
1541 /* record the stats before reset*/
1542 spin_lock(&adapter->stats64_lock);
1543 igb_update_stats(adapter, &adapter->stats64);
1544 spin_unlock(&adapter->stats64_lock);
1546 adapter->link_speed = 0;
1547 adapter->link_duplex = 0;
1549 if (!pci_channel_offline(adapter->pdev))
1550 igb_reset(adapter);
1551 igb_clean_all_tx_rings(adapter);
1552 igb_clean_all_rx_rings(adapter);
1553 #ifdef CONFIG_IGB_DCA
1555 /* since we reset the hardware DCA settings were cleared */
1556 igb_setup_dca(adapter);
1557 #endif
1560 void igb_reinit_locked(struct igb_adapter *adapter)
1562 WARN_ON(in_interrupt());
1563 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1564 msleep(1);
1565 igb_down(adapter);
1566 igb_up(adapter);
1567 clear_bit(__IGB_RESETTING, &adapter->state);
1570 void igb_reset(struct igb_adapter *adapter)
1572 struct pci_dev *pdev = adapter->pdev;
1573 struct e1000_hw *hw = &adapter->hw;
1574 struct e1000_mac_info *mac = &hw->mac;
1575 struct e1000_fc_info *fc = &hw->fc;
1576 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1577 u16 hwm;
1579 /* Repartition Pba for greater than 9k mtu
1580 * To take effect CTRL.RST is required.
1582 switch (mac->type) {
1583 case e1000_i350:
1584 case e1000_82580:
1585 pba = rd32(E1000_RXPBS);
1586 pba = igb_rxpbs_adjust_82580(pba);
1587 break;
1588 case e1000_82576:
1589 pba = rd32(E1000_RXPBS);
1590 pba &= E1000_RXPBS_SIZE_MASK_82576;
1591 break;
1592 case e1000_82575:
1593 default:
1594 pba = E1000_PBA_34K;
1595 break;
1598 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1599 (mac->type < e1000_82576)) {
1600 /* adjust PBA for jumbo frames */
1601 wr32(E1000_PBA, pba);
1603 /* To maintain wire speed transmits, the Tx FIFO should be
1604 * large enough to accommodate two full transmit packets,
1605 * rounded up to the next 1KB and expressed in KB. Likewise,
1606 * the Rx FIFO should be large enough to accommodate at least
1607 * one full receive packet and is similarly rounded up and
1608 * expressed in KB. */
1609 pba = rd32(E1000_PBA);
1610 /* upper 16 bits has Tx packet buffer allocation size in KB */
1611 tx_space = pba >> 16;
1612 /* lower 16 bits has Rx packet buffer allocation size in KB */
1613 pba &= 0xffff;
1614 /* the tx fifo also stores 16 bytes of information about the tx
1615 * but don't include ethernet FCS because hardware appends it */
1616 min_tx_space = (adapter->max_frame_size +
1617 sizeof(union e1000_adv_tx_desc) -
1618 ETH_FCS_LEN) * 2;
1619 min_tx_space = ALIGN(min_tx_space, 1024);
1620 min_tx_space >>= 10;
1621 /* software strips receive CRC, so leave room for it */
1622 min_rx_space = adapter->max_frame_size;
1623 min_rx_space = ALIGN(min_rx_space, 1024);
1624 min_rx_space >>= 10;
1626 /* If current Tx allocation is less than the min Tx FIFO size,
1627 * and the min Tx FIFO size is less than the current Rx FIFO
1628 * allocation, take space away from current Rx allocation */
1629 if (tx_space < min_tx_space &&
1630 ((min_tx_space - tx_space) < pba)) {
1631 pba = pba - (min_tx_space - tx_space);
1633 /* if short on rx space, rx wins and must trump tx
1634 * adjustment */
1635 if (pba < min_rx_space)
1636 pba = min_rx_space;
1638 wr32(E1000_PBA, pba);
1641 /* flow control settings */
1642 /* The high water mark must be low enough to fit one full frame
1643 * (or the size used for early receive) above it in the Rx FIFO.
1644 * Set it to the lower of:
1645 * - 90% of the Rx FIFO size, or
1646 * - the full Rx FIFO size minus one full frame */
1647 hwm = min(((pba << 10) * 9 / 10),
1648 ((pba << 10) - 2 * adapter->max_frame_size));
1650 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1651 fc->low_water = fc->high_water - 16;
1652 fc->pause_time = 0xFFFF;
1653 fc->send_xon = 1;
1654 fc->current_mode = fc->requested_mode;
1656 /* disable receive for all VFs and wait one second */
1657 if (adapter->vfs_allocated_count) {
1658 int i;
1659 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1660 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1662 /* ping all the active vfs to let them know we are going down */
1663 igb_ping_all_vfs(adapter);
1665 /* disable transmits and receives */
1666 wr32(E1000_VFRE, 0);
1667 wr32(E1000_VFTE, 0);
1670 /* Allow time for pending master requests to run */
1671 hw->mac.ops.reset_hw(hw);
1672 wr32(E1000_WUC, 0);
1674 if (hw->mac.ops.init_hw(hw))
1675 dev_err(&pdev->dev, "Hardware Error\n");
1677 if (hw->mac.type == e1000_82580) {
1678 u32 reg = rd32(E1000_PCIEMISC);
1679 wr32(E1000_PCIEMISC,
1680 reg & ~E1000_PCIEMISC_LX_DECISION);
1682 if (!netif_running(adapter->netdev))
1683 igb_power_down_link(adapter);
1685 igb_update_mng_vlan(adapter);
1687 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1688 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1690 igb_get_phy_info(hw);
1693 static const struct net_device_ops igb_netdev_ops = {
1694 .ndo_open = igb_open,
1695 .ndo_stop = igb_close,
1696 .ndo_start_xmit = igb_xmit_frame_adv,
1697 .ndo_get_stats64 = igb_get_stats64,
1698 .ndo_set_rx_mode = igb_set_rx_mode,
1699 .ndo_set_multicast_list = igb_set_rx_mode,
1700 .ndo_set_mac_address = igb_set_mac,
1701 .ndo_change_mtu = igb_change_mtu,
1702 .ndo_do_ioctl = igb_ioctl,
1703 .ndo_tx_timeout = igb_tx_timeout,
1704 .ndo_validate_addr = eth_validate_addr,
1705 .ndo_vlan_rx_register = igb_vlan_rx_register,
1706 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1707 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1708 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1709 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1710 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1711 .ndo_get_vf_config = igb_ndo_get_vf_config,
1712 #ifdef CONFIG_NET_POLL_CONTROLLER
1713 .ndo_poll_controller = igb_netpoll,
1714 #endif
1718 * igb_probe - Device Initialization Routine
1719 * @pdev: PCI device information struct
1720 * @ent: entry in igb_pci_tbl
1722 * Returns 0 on success, negative on failure
1724 * igb_probe initializes an adapter identified by a pci_dev structure.
1725 * The OS initialization, configuring of the adapter private structure,
1726 * and a hardware reset occur.
1728 static int __devinit igb_probe(struct pci_dev *pdev,
1729 const struct pci_device_id *ent)
1731 struct net_device *netdev;
1732 struct igb_adapter *adapter;
1733 struct e1000_hw *hw;
1734 u16 eeprom_data = 0;
1735 s32 ret_val;
1736 static int global_quad_port_a; /* global quad port a indication */
1737 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1738 unsigned long mmio_start, mmio_len;
1739 int err, pci_using_dac;
1740 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1741 u8 part_str[E1000_PBANUM_LENGTH];
1743 /* Catch broken hardware that put the wrong VF device ID in
1744 * the PCIe SR-IOV capability.
1746 if (pdev->is_virtfn) {
1747 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1748 pci_name(pdev), pdev->vendor, pdev->device);
1749 return -EINVAL;
1752 err = pci_enable_device_mem(pdev);
1753 if (err)
1754 return err;
1756 pci_using_dac = 0;
1757 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1758 if (!err) {
1759 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1760 if (!err)
1761 pci_using_dac = 1;
1762 } else {
1763 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1764 if (err) {
1765 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1766 if (err) {
1767 dev_err(&pdev->dev, "No usable DMA "
1768 "configuration, aborting\n");
1769 goto err_dma;
1774 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1775 IORESOURCE_MEM),
1776 igb_driver_name);
1777 if (err)
1778 goto err_pci_reg;
1780 pci_enable_pcie_error_reporting(pdev);
1782 pci_set_master(pdev);
1783 pci_save_state(pdev);
1785 err = -ENOMEM;
1786 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1787 IGB_ABS_MAX_TX_QUEUES);
1788 if (!netdev)
1789 goto err_alloc_etherdev;
1791 SET_NETDEV_DEV(netdev, &pdev->dev);
1793 pci_set_drvdata(pdev, netdev);
1794 adapter = netdev_priv(netdev);
1795 adapter->netdev = netdev;
1796 adapter->pdev = pdev;
1797 hw = &adapter->hw;
1798 hw->back = adapter;
1799 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1801 mmio_start = pci_resource_start(pdev, 0);
1802 mmio_len = pci_resource_len(pdev, 0);
1804 err = -EIO;
1805 hw->hw_addr = ioremap(mmio_start, mmio_len);
1806 if (!hw->hw_addr)
1807 goto err_ioremap;
1809 netdev->netdev_ops = &igb_netdev_ops;
1810 igb_set_ethtool_ops(netdev);
1811 netdev->watchdog_timeo = 5 * HZ;
1813 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1815 netdev->mem_start = mmio_start;
1816 netdev->mem_end = mmio_start + mmio_len;
1818 /* PCI config space info */
1819 hw->vendor_id = pdev->vendor;
1820 hw->device_id = pdev->device;
1821 hw->revision_id = pdev->revision;
1822 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1823 hw->subsystem_device_id = pdev->subsystem_device;
1825 /* Copy the default MAC, PHY and NVM function pointers */
1826 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1827 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1828 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1829 /* Initialize skew-specific constants */
1830 err = ei->get_invariants(hw);
1831 if (err)
1832 goto err_sw_init;
1834 /* setup the private structure */
1835 err = igb_sw_init(adapter);
1836 if (err)
1837 goto err_sw_init;
1839 igb_get_bus_info_pcie(hw);
1841 hw->phy.autoneg_wait_to_complete = false;
1843 /* Copper options */
1844 if (hw->phy.media_type == e1000_media_type_copper) {
1845 hw->phy.mdix = AUTO_ALL_MODES;
1846 hw->phy.disable_polarity_correction = false;
1847 hw->phy.ms_type = e1000_ms_hw_default;
1850 if (igb_check_reset_block(hw))
1851 dev_info(&pdev->dev,
1852 "PHY reset is blocked due to SOL/IDER session.\n");
1854 netdev->features = NETIF_F_SG |
1855 NETIF_F_IP_CSUM |
1856 NETIF_F_HW_VLAN_TX |
1857 NETIF_F_HW_VLAN_RX |
1858 NETIF_F_HW_VLAN_FILTER;
1860 netdev->features |= NETIF_F_IPV6_CSUM;
1861 netdev->features |= NETIF_F_TSO;
1862 netdev->features |= NETIF_F_TSO6;
1863 netdev->features |= NETIF_F_GRO;
1865 netdev->vlan_features |= NETIF_F_TSO;
1866 netdev->vlan_features |= NETIF_F_TSO6;
1867 netdev->vlan_features |= NETIF_F_IP_CSUM;
1868 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1869 netdev->vlan_features |= NETIF_F_SG;
1871 if (pci_using_dac) {
1872 netdev->features |= NETIF_F_HIGHDMA;
1873 netdev->vlan_features |= NETIF_F_HIGHDMA;
1876 if (hw->mac.type >= e1000_82576)
1877 netdev->features |= NETIF_F_SCTP_CSUM;
1879 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1881 /* before reading the NVM, reset the controller to put the device in a
1882 * known good starting state */
1883 hw->mac.ops.reset_hw(hw);
1885 /* make sure the NVM is good */
1886 if (igb_validate_nvm_checksum(hw) < 0) {
1887 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1888 err = -EIO;
1889 goto err_eeprom;
1892 /* copy the MAC address out of the NVM */
1893 if (hw->mac.ops.read_mac_addr(hw))
1894 dev_err(&pdev->dev, "NVM Read Error\n");
1896 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1897 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1899 if (!is_valid_ether_addr(netdev->perm_addr)) {
1900 dev_err(&pdev->dev, "Invalid MAC Address\n");
1901 err = -EIO;
1902 goto err_eeprom;
1905 setup_timer(&adapter->watchdog_timer, igb_watchdog,
1906 (unsigned long) adapter);
1907 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1908 (unsigned long) adapter);
1910 INIT_WORK(&adapter->reset_task, igb_reset_task);
1911 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1913 /* Initialize link properties that are user-changeable */
1914 adapter->fc_autoneg = true;
1915 hw->mac.autoneg = true;
1916 hw->phy.autoneg_advertised = 0x2f;
1918 hw->fc.requested_mode = e1000_fc_default;
1919 hw->fc.current_mode = e1000_fc_default;
1921 igb_validate_mdi_setting(hw);
1923 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1924 * enable the ACPI Magic Packet filter
1927 if (hw->bus.func == 0)
1928 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1929 else if (hw->mac.type == e1000_82580)
1930 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1931 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1932 &eeprom_data);
1933 else if (hw->bus.func == 1)
1934 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1936 if (eeprom_data & eeprom_apme_mask)
1937 adapter->eeprom_wol |= E1000_WUFC_MAG;
1939 /* now that we have the eeprom settings, apply the special cases where
1940 * the eeprom may be wrong or the board simply won't support wake on
1941 * lan on a particular port */
1942 switch (pdev->device) {
1943 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1944 adapter->eeprom_wol = 0;
1945 break;
1946 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1947 case E1000_DEV_ID_82576_FIBER:
1948 case E1000_DEV_ID_82576_SERDES:
1949 /* Wake events only supported on port A for dual fiber
1950 * regardless of eeprom setting */
1951 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1952 adapter->eeprom_wol = 0;
1953 break;
1954 case E1000_DEV_ID_82576_QUAD_COPPER:
1955 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1956 /* if quad port adapter, disable WoL on all but port A */
1957 if (global_quad_port_a != 0)
1958 adapter->eeprom_wol = 0;
1959 else
1960 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1961 /* Reset for multiple quad port adapters */
1962 if (++global_quad_port_a == 4)
1963 global_quad_port_a = 0;
1964 break;
1967 /* initialize the wol settings based on the eeprom settings */
1968 adapter->wol = adapter->eeprom_wol;
1969 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1971 /* reset the hardware with the new settings */
1972 igb_reset(adapter);
1974 /* let the f/w know that the h/w is now under the control of the
1975 * driver. */
1976 igb_get_hw_control(adapter);
1978 strcpy(netdev->name, "eth%d");
1979 err = register_netdev(netdev);
1980 if (err)
1981 goto err_register;
1983 /* carrier off reporting is important to ethtool even BEFORE open */
1984 netif_carrier_off(netdev);
1986 #ifdef CONFIG_IGB_DCA
1987 if (dca_add_requester(&pdev->dev) == 0) {
1988 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1989 dev_info(&pdev->dev, "DCA enabled\n");
1990 igb_setup_dca(adapter);
1993 #endif
1994 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1995 /* print bus type/speed/width info */
1996 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1997 netdev->name,
1998 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1999 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2000 "unknown"),
2001 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2002 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2003 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2004 "unknown"),
2005 netdev->dev_addr);
2007 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2008 if (ret_val)
2009 strcpy(part_str, "Unknown");
2010 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2011 dev_info(&pdev->dev,
2012 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2013 adapter->msix_entries ? "MSI-X" :
2014 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2015 adapter->num_rx_queues, adapter->num_tx_queues);
2017 return 0;
2019 err_register:
2020 igb_release_hw_control(adapter);
2021 err_eeprom:
2022 if (!igb_check_reset_block(hw))
2023 igb_reset_phy(hw);
2025 if (hw->flash_address)
2026 iounmap(hw->flash_address);
2027 err_sw_init:
2028 igb_clear_interrupt_scheme(adapter);
2029 iounmap(hw->hw_addr);
2030 err_ioremap:
2031 free_netdev(netdev);
2032 err_alloc_etherdev:
2033 pci_release_selected_regions(pdev,
2034 pci_select_bars(pdev, IORESOURCE_MEM));
2035 err_pci_reg:
2036 err_dma:
2037 pci_disable_device(pdev);
2038 return err;
2042 * igb_remove - Device Removal Routine
2043 * @pdev: PCI device information struct
2045 * igb_remove is called by the PCI subsystem to alert the driver
2046 * that it should release a PCI device. The could be caused by a
2047 * Hot-Plug event, or because the driver is going to be removed from
2048 * memory.
2050 static void __devexit igb_remove(struct pci_dev *pdev)
2052 struct net_device *netdev = pci_get_drvdata(pdev);
2053 struct igb_adapter *adapter = netdev_priv(netdev);
2054 struct e1000_hw *hw = &adapter->hw;
2057 * The watchdog timer may be rescheduled, so explicitly
2058 * disable watchdog from being rescheduled.
2060 set_bit(__IGB_DOWN, &adapter->state);
2061 del_timer_sync(&adapter->watchdog_timer);
2062 del_timer_sync(&adapter->phy_info_timer);
2064 cancel_work_sync(&adapter->reset_task);
2065 cancel_work_sync(&adapter->watchdog_task);
2067 #ifdef CONFIG_IGB_DCA
2068 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2069 dev_info(&pdev->dev, "DCA disabled\n");
2070 dca_remove_requester(&pdev->dev);
2071 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2072 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2074 #endif
2076 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2077 * would have already happened in close and is redundant. */
2078 igb_release_hw_control(adapter);
2080 unregister_netdev(netdev);
2082 igb_clear_interrupt_scheme(adapter);
2084 #ifdef CONFIG_PCI_IOV
2085 /* reclaim resources allocated to VFs */
2086 if (adapter->vf_data) {
2087 /* disable iov and allow time for transactions to clear */
2088 pci_disable_sriov(pdev);
2089 msleep(500);
2091 kfree(adapter->vf_data);
2092 adapter->vf_data = NULL;
2093 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2094 msleep(100);
2095 dev_info(&pdev->dev, "IOV Disabled\n");
2097 #endif
2099 iounmap(hw->hw_addr);
2100 if (hw->flash_address)
2101 iounmap(hw->flash_address);
2102 pci_release_selected_regions(pdev,
2103 pci_select_bars(pdev, IORESOURCE_MEM));
2105 free_netdev(netdev);
2107 pci_disable_pcie_error_reporting(pdev);
2109 pci_disable_device(pdev);
2113 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2114 * @adapter: board private structure to initialize
2116 * This function initializes the vf specific data storage and then attempts to
2117 * allocate the VFs. The reason for ordering it this way is because it is much
2118 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2119 * the memory for the VFs.
2121 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2123 #ifdef CONFIG_PCI_IOV
2124 struct pci_dev *pdev = adapter->pdev;
2126 if (adapter->vfs_allocated_count) {
2127 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2128 sizeof(struct vf_data_storage),
2129 GFP_KERNEL);
2130 /* if allocation failed then we do not support SR-IOV */
2131 if (!adapter->vf_data) {
2132 adapter->vfs_allocated_count = 0;
2133 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2134 "Data Storage\n");
2138 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2139 kfree(adapter->vf_data);
2140 adapter->vf_data = NULL;
2141 #endif /* CONFIG_PCI_IOV */
2142 adapter->vfs_allocated_count = 0;
2143 #ifdef CONFIG_PCI_IOV
2144 } else {
2145 unsigned char mac_addr[ETH_ALEN];
2146 int i;
2147 dev_info(&pdev->dev, "%d vfs allocated\n",
2148 adapter->vfs_allocated_count);
2149 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2150 random_ether_addr(mac_addr);
2151 igb_set_vf_mac(adapter, i, mac_addr);
2154 #endif /* CONFIG_PCI_IOV */
2159 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2160 * @adapter: board private structure to initialize
2162 * igb_init_hw_timer initializes the function pointer and values for the hw
2163 * timer found in hardware.
2165 static void igb_init_hw_timer(struct igb_adapter *adapter)
2167 struct e1000_hw *hw = &adapter->hw;
2169 switch (hw->mac.type) {
2170 case e1000_i350:
2171 case e1000_82580:
2172 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2173 adapter->cycles.read = igb_read_clock;
2174 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2175 adapter->cycles.mult = 1;
2177 * The 82580 timesync updates the system timer every 8ns by 8ns
2178 * and the value cannot be shifted. Instead we need to shift
2179 * the registers to generate a 64bit timer value. As a result
2180 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2181 * 24 in order to generate a larger value for synchronization.
2183 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2184 /* disable system timer temporarily by setting bit 31 */
2185 wr32(E1000_TSAUXC, 0x80000000);
2186 wrfl();
2188 /* Set registers so that rollover occurs soon to test this. */
2189 wr32(E1000_SYSTIMR, 0x00000000);
2190 wr32(E1000_SYSTIML, 0x80000000);
2191 wr32(E1000_SYSTIMH, 0x000000FF);
2192 wrfl();
2194 /* enable system timer by clearing bit 31 */
2195 wr32(E1000_TSAUXC, 0x0);
2196 wrfl();
2198 timecounter_init(&adapter->clock,
2199 &adapter->cycles,
2200 ktime_to_ns(ktime_get_real()));
2202 * Synchronize our NIC clock against system wall clock. NIC
2203 * time stamp reading requires ~3us per sample, each sample
2204 * was pretty stable even under load => only require 10
2205 * samples for each offset comparison.
2207 memset(&adapter->compare, 0, sizeof(adapter->compare));
2208 adapter->compare.source = &adapter->clock;
2209 adapter->compare.target = ktime_get_real;
2210 adapter->compare.num_samples = 10;
2211 timecompare_update(&adapter->compare, 0);
2212 break;
2213 case e1000_82576:
2215 * Initialize hardware timer: we keep it running just in case
2216 * that some program needs it later on.
2218 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2219 adapter->cycles.read = igb_read_clock;
2220 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2221 adapter->cycles.mult = 1;
2223 * Scale the NIC clock cycle by a large factor so that
2224 * relatively small clock corrections can be added or
2225 * substracted at each clock tick. The drawbacks of a large
2226 * factor are a) that the clock register overflows more quickly
2227 * (not such a big deal) and b) that the increment per tick has
2228 * to fit into 24 bits. As a result we need to use a shift of
2229 * 19 so we can fit a value of 16 into the TIMINCA register.
2231 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2232 wr32(E1000_TIMINCA,
2233 (1 << E1000_TIMINCA_16NS_SHIFT) |
2234 (16 << IGB_82576_TSYNC_SHIFT));
2236 /* Set registers so that rollover occurs soon to test this. */
2237 wr32(E1000_SYSTIML, 0x00000000);
2238 wr32(E1000_SYSTIMH, 0xFF800000);
2239 wrfl();
2241 timecounter_init(&adapter->clock,
2242 &adapter->cycles,
2243 ktime_to_ns(ktime_get_real()));
2245 * Synchronize our NIC clock against system wall clock. NIC
2246 * time stamp reading requires ~3us per sample, each sample
2247 * was pretty stable even under load => only require 10
2248 * samples for each offset comparison.
2250 memset(&adapter->compare, 0, sizeof(adapter->compare));
2251 adapter->compare.source = &adapter->clock;
2252 adapter->compare.target = ktime_get_real;
2253 adapter->compare.num_samples = 10;
2254 timecompare_update(&adapter->compare, 0);
2255 break;
2256 case e1000_82575:
2257 /* 82575 does not support timesync */
2258 default:
2259 break;
2265 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2266 * @adapter: board private structure to initialize
2268 * igb_sw_init initializes the Adapter private data structure.
2269 * Fields are initialized based on PCI device information and
2270 * OS network device settings (MTU size).
2272 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2274 struct e1000_hw *hw = &adapter->hw;
2275 struct net_device *netdev = adapter->netdev;
2276 struct pci_dev *pdev = adapter->pdev;
2278 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2280 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2281 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2282 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2283 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2285 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2286 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2288 spin_lock_init(&adapter->stats64_lock);
2289 #ifdef CONFIG_PCI_IOV
2290 switch (hw->mac.type) {
2291 case e1000_82576:
2292 case e1000_i350:
2293 adapter->vfs_allocated_count = (max_vfs > 7) ? 7 : max_vfs;
2294 break;
2295 default:
2296 break;
2298 #endif /* CONFIG_PCI_IOV */
2299 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2302 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2303 * then we should combine the queues into a queue pair in order to
2304 * conserve interrupts due to limited supply
2306 if ((adapter->rss_queues > 4) ||
2307 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2308 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2310 /* This call may decrease the number of queues */
2311 if (igb_init_interrupt_scheme(adapter)) {
2312 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2313 return -ENOMEM;
2316 igb_init_hw_timer(adapter);
2317 igb_probe_vfs(adapter);
2319 /* Explicitly disable IRQ since the NIC can be in any state. */
2320 igb_irq_disable(adapter);
2322 set_bit(__IGB_DOWN, &adapter->state);
2323 return 0;
2327 * igb_open - Called when a network interface is made active
2328 * @netdev: network interface device structure
2330 * Returns 0 on success, negative value on failure
2332 * The open entry point is called when a network interface is made
2333 * active by the system (IFF_UP). At this point all resources needed
2334 * for transmit and receive operations are allocated, the interrupt
2335 * handler is registered with the OS, the watchdog timer is started,
2336 * and the stack is notified that the interface is ready.
2338 static int igb_open(struct net_device *netdev)
2340 struct igb_adapter *adapter = netdev_priv(netdev);
2341 struct e1000_hw *hw = &adapter->hw;
2342 int err;
2343 int i;
2345 /* disallow open during test */
2346 if (test_bit(__IGB_TESTING, &adapter->state))
2347 return -EBUSY;
2349 netif_carrier_off(netdev);
2351 /* allocate transmit descriptors */
2352 err = igb_setup_all_tx_resources(adapter);
2353 if (err)
2354 goto err_setup_tx;
2356 /* allocate receive descriptors */
2357 err = igb_setup_all_rx_resources(adapter);
2358 if (err)
2359 goto err_setup_rx;
2361 igb_power_up_link(adapter);
2363 /* before we allocate an interrupt, we must be ready to handle it.
2364 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2365 * as soon as we call pci_request_irq, so we have to setup our
2366 * clean_rx handler before we do so. */
2367 igb_configure(adapter);
2369 err = igb_request_irq(adapter);
2370 if (err)
2371 goto err_req_irq;
2373 /* From here on the code is the same as igb_up() */
2374 clear_bit(__IGB_DOWN, &adapter->state);
2376 for (i = 0; i < adapter->num_q_vectors; i++) {
2377 struct igb_q_vector *q_vector = adapter->q_vector[i];
2378 napi_enable(&q_vector->napi);
2381 /* Clear any pending interrupts. */
2382 rd32(E1000_ICR);
2384 igb_irq_enable(adapter);
2386 /* notify VFs that reset has been completed */
2387 if (adapter->vfs_allocated_count) {
2388 u32 reg_data = rd32(E1000_CTRL_EXT);
2389 reg_data |= E1000_CTRL_EXT_PFRSTD;
2390 wr32(E1000_CTRL_EXT, reg_data);
2393 netif_tx_start_all_queues(netdev);
2395 /* start the watchdog. */
2396 hw->mac.get_link_status = 1;
2397 schedule_work(&adapter->watchdog_task);
2399 return 0;
2401 err_req_irq:
2402 igb_release_hw_control(adapter);
2403 igb_power_down_link(adapter);
2404 igb_free_all_rx_resources(adapter);
2405 err_setup_rx:
2406 igb_free_all_tx_resources(adapter);
2407 err_setup_tx:
2408 igb_reset(adapter);
2410 return err;
2414 * igb_close - Disables a network interface
2415 * @netdev: network interface device structure
2417 * Returns 0, this is not allowed to fail
2419 * The close entry point is called when an interface is de-activated
2420 * by the OS. The hardware is still under the driver's control, but
2421 * needs to be disabled. A global MAC reset is issued to stop the
2422 * hardware, and all transmit and receive resources are freed.
2424 static int igb_close(struct net_device *netdev)
2426 struct igb_adapter *adapter = netdev_priv(netdev);
2428 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2429 igb_down(adapter);
2431 igb_free_irq(adapter);
2433 igb_free_all_tx_resources(adapter);
2434 igb_free_all_rx_resources(adapter);
2436 return 0;
2440 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2441 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2443 * Return 0 on success, negative on failure
2445 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2447 struct device *dev = tx_ring->dev;
2448 int size;
2450 size = sizeof(struct igb_buffer) * tx_ring->count;
2451 tx_ring->buffer_info = vzalloc(size);
2452 if (!tx_ring->buffer_info)
2453 goto err;
2455 /* round up to nearest 4K */
2456 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2457 tx_ring->size = ALIGN(tx_ring->size, 4096);
2459 tx_ring->desc = dma_alloc_coherent(dev,
2460 tx_ring->size,
2461 &tx_ring->dma,
2462 GFP_KERNEL);
2464 if (!tx_ring->desc)
2465 goto err;
2467 tx_ring->next_to_use = 0;
2468 tx_ring->next_to_clean = 0;
2469 return 0;
2471 err:
2472 vfree(tx_ring->buffer_info);
2473 dev_err(dev,
2474 "Unable to allocate memory for the transmit descriptor ring\n");
2475 return -ENOMEM;
2479 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2480 * (Descriptors) for all queues
2481 * @adapter: board private structure
2483 * Return 0 on success, negative on failure
2485 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2487 struct pci_dev *pdev = adapter->pdev;
2488 int i, err = 0;
2490 for (i = 0; i < adapter->num_tx_queues; i++) {
2491 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2492 if (err) {
2493 dev_err(&pdev->dev,
2494 "Allocation for Tx Queue %u failed\n", i);
2495 for (i--; i >= 0; i--)
2496 igb_free_tx_resources(adapter->tx_ring[i]);
2497 break;
2501 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2502 int r_idx = i % adapter->num_tx_queues;
2503 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2505 return err;
2509 * igb_setup_tctl - configure the transmit control registers
2510 * @adapter: Board private structure
2512 void igb_setup_tctl(struct igb_adapter *adapter)
2514 struct e1000_hw *hw = &adapter->hw;
2515 u32 tctl;
2517 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2518 wr32(E1000_TXDCTL(0), 0);
2520 /* Program the Transmit Control Register */
2521 tctl = rd32(E1000_TCTL);
2522 tctl &= ~E1000_TCTL_CT;
2523 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2524 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2526 igb_config_collision_dist(hw);
2528 /* Enable transmits */
2529 tctl |= E1000_TCTL_EN;
2531 wr32(E1000_TCTL, tctl);
2535 * igb_configure_tx_ring - Configure transmit ring after Reset
2536 * @adapter: board private structure
2537 * @ring: tx ring to configure
2539 * Configure a transmit ring after a reset.
2541 void igb_configure_tx_ring(struct igb_adapter *adapter,
2542 struct igb_ring *ring)
2544 struct e1000_hw *hw = &adapter->hw;
2545 u32 txdctl;
2546 u64 tdba = ring->dma;
2547 int reg_idx = ring->reg_idx;
2549 /* disable the queue */
2550 txdctl = rd32(E1000_TXDCTL(reg_idx));
2551 wr32(E1000_TXDCTL(reg_idx),
2552 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2553 wrfl();
2554 mdelay(10);
2556 wr32(E1000_TDLEN(reg_idx),
2557 ring->count * sizeof(union e1000_adv_tx_desc));
2558 wr32(E1000_TDBAL(reg_idx),
2559 tdba & 0x00000000ffffffffULL);
2560 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2562 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2563 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2564 writel(0, ring->head);
2565 writel(0, ring->tail);
2567 txdctl |= IGB_TX_PTHRESH;
2568 txdctl |= IGB_TX_HTHRESH << 8;
2569 txdctl |= IGB_TX_WTHRESH << 16;
2571 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2572 wr32(E1000_TXDCTL(reg_idx), txdctl);
2576 * igb_configure_tx - Configure transmit Unit after Reset
2577 * @adapter: board private structure
2579 * Configure the Tx unit of the MAC after a reset.
2581 static void igb_configure_tx(struct igb_adapter *adapter)
2583 int i;
2585 for (i = 0; i < adapter->num_tx_queues; i++)
2586 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2590 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2591 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2593 * Returns 0 on success, negative on failure
2595 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2597 struct device *dev = rx_ring->dev;
2598 int size, desc_len;
2600 size = sizeof(struct igb_buffer) * rx_ring->count;
2601 rx_ring->buffer_info = vzalloc(size);
2602 if (!rx_ring->buffer_info)
2603 goto err;
2605 desc_len = sizeof(union e1000_adv_rx_desc);
2607 /* Round up to nearest 4K */
2608 rx_ring->size = rx_ring->count * desc_len;
2609 rx_ring->size = ALIGN(rx_ring->size, 4096);
2611 rx_ring->desc = dma_alloc_coherent(dev,
2612 rx_ring->size,
2613 &rx_ring->dma,
2614 GFP_KERNEL);
2616 if (!rx_ring->desc)
2617 goto err;
2619 rx_ring->next_to_clean = 0;
2620 rx_ring->next_to_use = 0;
2622 return 0;
2624 err:
2625 vfree(rx_ring->buffer_info);
2626 rx_ring->buffer_info = NULL;
2627 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2628 " ring\n");
2629 return -ENOMEM;
2633 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2634 * (Descriptors) for all queues
2635 * @adapter: board private structure
2637 * Return 0 on success, negative on failure
2639 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2641 struct pci_dev *pdev = adapter->pdev;
2642 int i, err = 0;
2644 for (i = 0; i < adapter->num_rx_queues; i++) {
2645 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2646 if (err) {
2647 dev_err(&pdev->dev,
2648 "Allocation for Rx Queue %u failed\n", i);
2649 for (i--; i >= 0; i--)
2650 igb_free_rx_resources(adapter->rx_ring[i]);
2651 break;
2655 return err;
2659 * igb_setup_mrqc - configure the multiple receive queue control registers
2660 * @adapter: Board private structure
2662 static void igb_setup_mrqc(struct igb_adapter *adapter)
2664 struct e1000_hw *hw = &adapter->hw;
2665 u32 mrqc, rxcsum;
2666 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2667 union e1000_reta {
2668 u32 dword;
2669 u8 bytes[4];
2670 } reta;
2671 static const u8 rsshash[40] = {
2672 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2673 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2674 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2675 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2677 /* Fill out hash function seeds */
2678 for (j = 0; j < 10; j++) {
2679 u32 rsskey = rsshash[(j * 4)];
2680 rsskey |= rsshash[(j * 4) + 1] << 8;
2681 rsskey |= rsshash[(j * 4) + 2] << 16;
2682 rsskey |= rsshash[(j * 4) + 3] << 24;
2683 array_wr32(E1000_RSSRK(0), j, rsskey);
2686 num_rx_queues = adapter->rss_queues;
2688 if (adapter->vfs_allocated_count) {
2689 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2690 switch (hw->mac.type) {
2691 case e1000_i350:
2692 case e1000_82580:
2693 num_rx_queues = 1;
2694 shift = 0;
2695 break;
2696 case e1000_82576:
2697 shift = 3;
2698 num_rx_queues = 2;
2699 break;
2700 case e1000_82575:
2701 shift = 2;
2702 shift2 = 6;
2703 default:
2704 break;
2706 } else {
2707 if (hw->mac.type == e1000_82575)
2708 shift = 6;
2711 for (j = 0; j < (32 * 4); j++) {
2712 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2713 if (shift2)
2714 reta.bytes[j & 3] |= num_rx_queues << shift2;
2715 if ((j & 3) == 3)
2716 wr32(E1000_RETA(j >> 2), reta.dword);
2720 * Disable raw packet checksumming so that RSS hash is placed in
2721 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2722 * offloads as they are enabled by default
2724 rxcsum = rd32(E1000_RXCSUM);
2725 rxcsum |= E1000_RXCSUM_PCSD;
2727 if (adapter->hw.mac.type >= e1000_82576)
2728 /* Enable Receive Checksum Offload for SCTP */
2729 rxcsum |= E1000_RXCSUM_CRCOFL;
2731 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2732 wr32(E1000_RXCSUM, rxcsum);
2734 /* If VMDq is enabled then we set the appropriate mode for that, else
2735 * we default to RSS so that an RSS hash is calculated per packet even
2736 * if we are only using one queue */
2737 if (adapter->vfs_allocated_count) {
2738 if (hw->mac.type > e1000_82575) {
2739 /* Set the default pool for the PF's first queue */
2740 u32 vtctl = rd32(E1000_VT_CTL);
2741 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2742 E1000_VT_CTL_DISABLE_DEF_POOL);
2743 vtctl |= adapter->vfs_allocated_count <<
2744 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2745 wr32(E1000_VT_CTL, vtctl);
2747 if (adapter->rss_queues > 1)
2748 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2749 else
2750 mrqc = E1000_MRQC_ENABLE_VMDQ;
2751 } else {
2752 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2754 igb_vmm_control(adapter);
2757 * Generate RSS hash based on TCP port numbers and/or
2758 * IPv4/v6 src and dst addresses since UDP cannot be
2759 * hashed reliably due to IP fragmentation
2761 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2762 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2763 E1000_MRQC_RSS_FIELD_IPV6 |
2764 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2765 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2767 wr32(E1000_MRQC, mrqc);
2771 * igb_setup_rctl - configure the receive control registers
2772 * @adapter: Board private structure
2774 void igb_setup_rctl(struct igb_adapter *adapter)
2776 struct e1000_hw *hw = &adapter->hw;
2777 u32 rctl;
2779 rctl = rd32(E1000_RCTL);
2781 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2782 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2784 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2785 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2788 * enable stripping of CRC. It's unlikely this will break BMC
2789 * redirection as it did with e1000. Newer features require
2790 * that the HW strips the CRC.
2792 rctl |= E1000_RCTL_SECRC;
2794 /* disable store bad packets and clear size bits. */
2795 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2797 /* enable LPE to prevent packets larger than max_frame_size */
2798 rctl |= E1000_RCTL_LPE;
2800 /* disable queue 0 to prevent tail write w/o re-config */
2801 wr32(E1000_RXDCTL(0), 0);
2803 /* Attention!!! For SR-IOV PF driver operations you must enable
2804 * queue drop for all VF and PF queues to prevent head of line blocking
2805 * if an un-trusted VF does not provide descriptors to hardware.
2807 if (adapter->vfs_allocated_count) {
2808 /* set all queue drop enable bits */
2809 wr32(E1000_QDE, ALL_QUEUES);
2812 wr32(E1000_RCTL, rctl);
2815 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2816 int vfn)
2818 struct e1000_hw *hw = &adapter->hw;
2819 u32 vmolr;
2821 /* if it isn't the PF check to see if VFs are enabled and
2822 * increase the size to support vlan tags */
2823 if (vfn < adapter->vfs_allocated_count &&
2824 adapter->vf_data[vfn].vlans_enabled)
2825 size += VLAN_TAG_SIZE;
2827 vmolr = rd32(E1000_VMOLR(vfn));
2828 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2829 vmolr |= size | E1000_VMOLR_LPE;
2830 wr32(E1000_VMOLR(vfn), vmolr);
2832 return 0;
2836 * igb_rlpml_set - set maximum receive packet size
2837 * @adapter: board private structure
2839 * Configure maximum receivable packet size.
2841 static void igb_rlpml_set(struct igb_adapter *adapter)
2843 u32 max_frame_size = adapter->max_frame_size;
2844 struct e1000_hw *hw = &adapter->hw;
2845 u16 pf_id = adapter->vfs_allocated_count;
2847 if (adapter->vlgrp)
2848 max_frame_size += VLAN_TAG_SIZE;
2850 /* if vfs are enabled we set RLPML to the largest possible request
2851 * size and set the VMOLR RLPML to the size we need */
2852 if (pf_id) {
2853 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2854 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2857 wr32(E1000_RLPML, max_frame_size);
2860 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2861 int vfn, bool aupe)
2863 struct e1000_hw *hw = &adapter->hw;
2864 u32 vmolr;
2867 * This register exists only on 82576 and newer so if we are older then
2868 * we should exit and do nothing
2870 if (hw->mac.type < e1000_82576)
2871 return;
2873 vmolr = rd32(E1000_VMOLR(vfn));
2874 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2875 if (aupe)
2876 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2877 else
2878 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2880 /* clear all bits that might not be set */
2881 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2883 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2884 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2886 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2887 * multicast packets
2889 if (vfn <= adapter->vfs_allocated_count)
2890 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2892 wr32(E1000_VMOLR(vfn), vmolr);
2896 * igb_configure_rx_ring - Configure a receive ring after Reset
2897 * @adapter: board private structure
2898 * @ring: receive ring to be configured
2900 * Configure the Rx unit of the MAC after a reset.
2902 void igb_configure_rx_ring(struct igb_adapter *adapter,
2903 struct igb_ring *ring)
2905 struct e1000_hw *hw = &adapter->hw;
2906 u64 rdba = ring->dma;
2907 int reg_idx = ring->reg_idx;
2908 u32 srrctl, rxdctl;
2910 /* disable the queue */
2911 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2912 wr32(E1000_RXDCTL(reg_idx),
2913 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2915 /* Set DMA base address registers */
2916 wr32(E1000_RDBAL(reg_idx),
2917 rdba & 0x00000000ffffffffULL);
2918 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2919 wr32(E1000_RDLEN(reg_idx),
2920 ring->count * sizeof(union e1000_adv_rx_desc));
2922 /* initialize head and tail */
2923 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2924 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2925 writel(0, ring->head);
2926 writel(0, ring->tail);
2928 /* set descriptor configuration */
2929 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2930 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2931 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2932 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2933 srrctl |= IGB_RXBUFFER_16384 >>
2934 E1000_SRRCTL_BSIZEPKT_SHIFT;
2935 #else
2936 srrctl |= (PAGE_SIZE / 2) >>
2937 E1000_SRRCTL_BSIZEPKT_SHIFT;
2938 #endif
2939 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2940 } else {
2941 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2942 E1000_SRRCTL_BSIZEPKT_SHIFT;
2943 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2945 if (hw->mac.type == e1000_82580)
2946 srrctl |= E1000_SRRCTL_TIMESTAMP;
2947 /* Only set Drop Enable if we are supporting multiple queues */
2948 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2949 srrctl |= E1000_SRRCTL_DROP_EN;
2951 wr32(E1000_SRRCTL(reg_idx), srrctl);
2953 /* set filtering for VMDQ pools */
2954 igb_set_vmolr(adapter, reg_idx & 0x7, true);
2956 /* enable receive descriptor fetching */
2957 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2958 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2959 rxdctl &= 0xFFF00000;
2960 rxdctl |= IGB_RX_PTHRESH;
2961 rxdctl |= IGB_RX_HTHRESH << 8;
2962 rxdctl |= IGB_RX_WTHRESH << 16;
2963 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2967 * igb_configure_rx - Configure receive Unit after Reset
2968 * @adapter: board private structure
2970 * Configure the Rx unit of the MAC after a reset.
2972 static void igb_configure_rx(struct igb_adapter *adapter)
2974 int i;
2976 /* set UTA to appropriate mode */
2977 igb_set_uta(adapter);
2979 /* set the correct pool for the PF default MAC address in entry 0 */
2980 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2981 adapter->vfs_allocated_count);
2983 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2984 * the Base and Length of the Rx Descriptor Ring */
2985 for (i = 0; i < adapter->num_rx_queues; i++)
2986 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2990 * igb_free_tx_resources - Free Tx Resources per Queue
2991 * @tx_ring: Tx descriptor ring for a specific queue
2993 * Free all transmit software resources
2995 void igb_free_tx_resources(struct igb_ring *tx_ring)
2997 igb_clean_tx_ring(tx_ring);
2999 vfree(tx_ring->buffer_info);
3000 tx_ring->buffer_info = NULL;
3002 /* if not set, then don't free */
3003 if (!tx_ring->desc)
3004 return;
3006 dma_free_coherent(tx_ring->dev, tx_ring->size,
3007 tx_ring->desc, tx_ring->dma);
3009 tx_ring->desc = NULL;
3013 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3014 * @adapter: board private structure
3016 * Free all transmit software resources
3018 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3020 int i;
3022 for (i = 0; i < adapter->num_tx_queues; i++)
3023 igb_free_tx_resources(adapter->tx_ring[i]);
3026 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3027 struct igb_buffer *buffer_info)
3029 if (buffer_info->dma) {
3030 if (buffer_info->mapped_as_page)
3031 dma_unmap_page(tx_ring->dev,
3032 buffer_info->dma,
3033 buffer_info->length,
3034 DMA_TO_DEVICE);
3035 else
3036 dma_unmap_single(tx_ring->dev,
3037 buffer_info->dma,
3038 buffer_info->length,
3039 DMA_TO_DEVICE);
3040 buffer_info->dma = 0;
3042 if (buffer_info->skb) {
3043 dev_kfree_skb_any(buffer_info->skb);
3044 buffer_info->skb = NULL;
3046 buffer_info->time_stamp = 0;
3047 buffer_info->length = 0;
3048 buffer_info->next_to_watch = 0;
3049 buffer_info->mapped_as_page = false;
3053 * igb_clean_tx_ring - Free Tx Buffers
3054 * @tx_ring: ring to be cleaned
3056 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3058 struct igb_buffer *buffer_info;
3059 unsigned long size;
3060 unsigned int i;
3062 if (!tx_ring->buffer_info)
3063 return;
3064 /* Free all the Tx ring sk_buffs */
3066 for (i = 0; i < tx_ring->count; i++) {
3067 buffer_info = &tx_ring->buffer_info[i];
3068 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3071 size = sizeof(struct igb_buffer) * tx_ring->count;
3072 memset(tx_ring->buffer_info, 0, size);
3074 /* Zero out the descriptor ring */
3075 memset(tx_ring->desc, 0, tx_ring->size);
3077 tx_ring->next_to_use = 0;
3078 tx_ring->next_to_clean = 0;
3082 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3083 * @adapter: board private structure
3085 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3087 int i;
3089 for (i = 0; i < adapter->num_tx_queues; i++)
3090 igb_clean_tx_ring(adapter->tx_ring[i]);
3094 * igb_free_rx_resources - Free Rx Resources
3095 * @rx_ring: ring to clean the resources from
3097 * Free all receive software resources
3099 void igb_free_rx_resources(struct igb_ring *rx_ring)
3101 igb_clean_rx_ring(rx_ring);
3103 vfree(rx_ring->buffer_info);
3104 rx_ring->buffer_info = NULL;
3106 /* if not set, then don't free */
3107 if (!rx_ring->desc)
3108 return;
3110 dma_free_coherent(rx_ring->dev, rx_ring->size,
3111 rx_ring->desc, rx_ring->dma);
3113 rx_ring->desc = NULL;
3117 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3118 * @adapter: board private structure
3120 * Free all receive software resources
3122 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3124 int i;
3126 for (i = 0; i < adapter->num_rx_queues; i++)
3127 igb_free_rx_resources(adapter->rx_ring[i]);
3131 * igb_clean_rx_ring - Free Rx Buffers per Queue
3132 * @rx_ring: ring to free buffers from
3134 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3136 struct igb_buffer *buffer_info;
3137 unsigned long size;
3138 unsigned int i;
3140 if (!rx_ring->buffer_info)
3141 return;
3143 /* Free all the Rx ring sk_buffs */
3144 for (i = 0; i < rx_ring->count; i++) {
3145 buffer_info = &rx_ring->buffer_info[i];
3146 if (buffer_info->dma) {
3147 dma_unmap_single(rx_ring->dev,
3148 buffer_info->dma,
3149 rx_ring->rx_buffer_len,
3150 DMA_FROM_DEVICE);
3151 buffer_info->dma = 0;
3154 if (buffer_info->skb) {
3155 dev_kfree_skb(buffer_info->skb);
3156 buffer_info->skb = NULL;
3158 if (buffer_info->page_dma) {
3159 dma_unmap_page(rx_ring->dev,
3160 buffer_info->page_dma,
3161 PAGE_SIZE / 2,
3162 DMA_FROM_DEVICE);
3163 buffer_info->page_dma = 0;
3165 if (buffer_info->page) {
3166 put_page(buffer_info->page);
3167 buffer_info->page = NULL;
3168 buffer_info->page_offset = 0;
3172 size = sizeof(struct igb_buffer) * rx_ring->count;
3173 memset(rx_ring->buffer_info, 0, size);
3175 /* Zero out the descriptor ring */
3176 memset(rx_ring->desc, 0, rx_ring->size);
3178 rx_ring->next_to_clean = 0;
3179 rx_ring->next_to_use = 0;
3183 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3184 * @adapter: board private structure
3186 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3188 int i;
3190 for (i = 0; i < adapter->num_rx_queues; i++)
3191 igb_clean_rx_ring(adapter->rx_ring[i]);
3195 * igb_set_mac - Change the Ethernet Address of the NIC
3196 * @netdev: network interface device structure
3197 * @p: pointer to an address structure
3199 * Returns 0 on success, negative on failure
3201 static int igb_set_mac(struct net_device *netdev, void *p)
3203 struct igb_adapter *adapter = netdev_priv(netdev);
3204 struct e1000_hw *hw = &adapter->hw;
3205 struct sockaddr *addr = p;
3207 if (!is_valid_ether_addr(addr->sa_data))
3208 return -EADDRNOTAVAIL;
3210 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3211 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3213 /* set the correct pool for the new PF MAC address in entry 0 */
3214 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3215 adapter->vfs_allocated_count);
3217 return 0;
3221 * igb_write_mc_addr_list - write multicast addresses to MTA
3222 * @netdev: network interface device structure
3224 * Writes multicast address list to the MTA hash table.
3225 * Returns: -ENOMEM on failure
3226 * 0 on no addresses written
3227 * X on writing X addresses to MTA
3229 static int igb_write_mc_addr_list(struct net_device *netdev)
3231 struct igb_adapter *adapter = netdev_priv(netdev);
3232 struct e1000_hw *hw = &adapter->hw;
3233 struct netdev_hw_addr *ha;
3234 u8 *mta_list;
3235 int i;
3237 if (netdev_mc_empty(netdev)) {
3238 /* nothing to program, so clear mc list */
3239 igb_update_mc_addr_list(hw, NULL, 0);
3240 igb_restore_vf_multicasts(adapter);
3241 return 0;
3244 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3245 if (!mta_list)
3246 return -ENOMEM;
3248 /* The shared function expects a packed array of only addresses. */
3249 i = 0;
3250 netdev_for_each_mc_addr(ha, netdev)
3251 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3253 igb_update_mc_addr_list(hw, mta_list, i);
3254 kfree(mta_list);
3256 return netdev_mc_count(netdev);
3260 * igb_write_uc_addr_list - write unicast addresses to RAR table
3261 * @netdev: network interface device structure
3263 * Writes unicast address list to the RAR table.
3264 * Returns: -ENOMEM on failure/insufficient address space
3265 * 0 on no addresses written
3266 * X on writing X addresses to the RAR table
3268 static int igb_write_uc_addr_list(struct net_device *netdev)
3270 struct igb_adapter *adapter = netdev_priv(netdev);
3271 struct e1000_hw *hw = &adapter->hw;
3272 unsigned int vfn = adapter->vfs_allocated_count;
3273 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3274 int count = 0;
3276 /* return ENOMEM indicating insufficient memory for addresses */
3277 if (netdev_uc_count(netdev) > rar_entries)
3278 return -ENOMEM;
3280 if (!netdev_uc_empty(netdev) && rar_entries) {
3281 struct netdev_hw_addr *ha;
3283 netdev_for_each_uc_addr(ha, netdev) {
3284 if (!rar_entries)
3285 break;
3286 igb_rar_set_qsel(adapter, ha->addr,
3287 rar_entries--,
3288 vfn);
3289 count++;
3292 /* write the addresses in reverse order to avoid write combining */
3293 for (; rar_entries > 0 ; rar_entries--) {
3294 wr32(E1000_RAH(rar_entries), 0);
3295 wr32(E1000_RAL(rar_entries), 0);
3297 wrfl();
3299 return count;
3303 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3304 * @netdev: network interface device structure
3306 * The set_rx_mode entry point is called whenever the unicast or multicast
3307 * address lists or the network interface flags are updated. This routine is
3308 * responsible for configuring the hardware for proper unicast, multicast,
3309 * promiscuous mode, and all-multi behavior.
3311 static void igb_set_rx_mode(struct net_device *netdev)
3313 struct igb_adapter *adapter = netdev_priv(netdev);
3314 struct e1000_hw *hw = &adapter->hw;
3315 unsigned int vfn = adapter->vfs_allocated_count;
3316 u32 rctl, vmolr = 0;
3317 int count;
3319 /* Check for Promiscuous and All Multicast modes */
3320 rctl = rd32(E1000_RCTL);
3322 /* clear the effected bits */
3323 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3325 if (netdev->flags & IFF_PROMISC) {
3326 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3327 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3328 } else {
3329 if (netdev->flags & IFF_ALLMULTI) {
3330 rctl |= E1000_RCTL_MPE;
3331 vmolr |= E1000_VMOLR_MPME;
3332 } else {
3334 * Write addresses to the MTA, if the attempt fails
3335 * then we should just turn on promiscous mode so
3336 * that we can at least receive multicast traffic
3338 count = igb_write_mc_addr_list(netdev);
3339 if (count < 0) {
3340 rctl |= E1000_RCTL_MPE;
3341 vmolr |= E1000_VMOLR_MPME;
3342 } else if (count) {
3343 vmolr |= E1000_VMOLR_ROMPE;
3347 * Write addresses to available RAR registers, if there is not
3348 * sufficient space to store all the addresses then enable
3349 * unicast promiscous mode
3351 count = igb_write_uc_addr_list(netdev);
3352 if (count < 0) {
3353 rctl |= E1000_RCTL_UPE;
3354 vmolr |= E1000_VMOLR_ROPE;
3356 rctl |= E1000_RCTL_VFE;
3358 wr32(E1000_RCTL, rctl);
3361 * In order to support SR-IOV and eventually VMDq it is necessary to set
3362 * the VMOLR to enable the appropriate modes. Without this workaround
3363 * we will have issues with VLAN tag stripping not being done for frames
3364 * that are only arriving because we are the default pool
3366 if (hw->mac.type < e1000_82576)
3367 return;
3369 vmolr |= rd32(E1000_VMOLR(vfn)) &
3370 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3371 wr32(E1000_VMOLR(vfn), vmolr);
3372 igb_restore_vf_multicasts(adapter);
3375 static void igb_check_wvbr(struct igb_adapter *adapter)
3377 struct e1000_hw *hw = &adapter->hw;
3378 u32 wvbr = 0;
3380 switch (hw->mac.type) {
3381 case e1000_82576:
3382 case e1000_i350:
3383 if (!(wvbr = rd32(E1000_WVBR)))
3384 return;
3385 break;
3386 default:
3387 break;
3390 adapter->wvbr |= wvbr;
3393 #define IGB_STAGGERED_QUEUE_OFFSET 8
3395 static void igb_spoof_check(struct igb_adapter *adapter)
3397 int j;
3399 if (!adapter->wvbr)
3400 return;
3402 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3403 if (adapter->wvbr & (1 << j) ||
3404 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3405 dev_warn(&adapter->pdev->dev,
3406 "Spoof event(s) detected on VF %d\n", j);
3407 adapter->wvbr &=
3408 ~((1 << j) |
3409 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3414 /* Need to wait a few seconds after link up to get diagnostic information from
3415 * the phy */
3416 static void igb_update_phy_info(unsigned long data)
3418 struct igb_adapter *adapter = (struct igb_adapter *) data;
3419 igb_get_phy_info(&adapter->hw);
3423 * igb_has_link - check shared code for link and determine up/down
3424 * @adapter: pointer to driver private info
3426 bool igb_has_link(struct igb_adapter *adapter)
3428 struct e1000_hw *hw = &adapter->hw;
3429 bool link_active = false;
3430 s32 ret_val = 0;
3432 /* get_link_status is set on LSC (link status) interrupt or
3433 * rx sequence error interrupt. get_link_status will stay
3434 * false until the e1000_check_for_link establishes link
3435 * for copper adapters ONLY
3437 switch (hw->phy.media_type) {
3438 case e1000_media_type_copper:
3439 if (hw->mac.get_link_status) {
3440 ret_val = hw->mac.ops.check_for_link(hw);
3441 link_active = !hw->mac.get_link_status;
3442 } else {
3443 link_active = true;
3445 break;
3446 case e1000_media_type_internal_serdes:
3447 ret_val = hw->mac.ops.check_for_link(hw);
3448 link_active = hw->mac.serdes_has_link;
3449 break;
3450 default:
3451 case e1000_media_type_unknown:
3452 break;
3455 return link_active;
3459 * igb_watchdog - Timer Call-back
3460 * @data: pointer to adapter cast into an unsigned long
3462 static void igb_watchdog(unsigned long data)
3464 struct igb_adapter *adapter = (struct igb_adapter *)data;
3465 /* Do the rest outside of interrupt context */
3466 schedule_work(&adapter->watchdog_task);
3469 static void igb_watchdog_task(struct work_struct *work)
3471 struct igb_adapter *adapter = container_of(work,
3472 struct igb_adapter,
3473 watchdog_task);
3474 struct e1000_hw *hw = &adapter->hw;
3475 struct net_device *netdev = adapter->netdev;
3476 u32 link;
3477 int i;
3479 link = igb_has_link(adapter);
3480 if (link) {
3481 if (!netif_carrier_ok(netdev)) {
3482 u32 ctrl;
3483 hw->mac.ops.get_speed_and_duplex(hw,
3484 &adapter->link_speed,
3485 &adapter->link_duplex);
3487 ctrl = rd32(E1000_CTRL);
3488 /* Links status message must follow this format */
3489 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3490 "Flow Control: %s\n",
3491 netdev->name,
3492 adapter->link_speed,
3493 adapter->link_duplex == FULL_DUPLEX ?
3494 "Full Duplex" : "Half Duplex",
3495 ((ctrl & E1000_CTRL_TFCE) &&
3496 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3497 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3498 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3500 /* adjust timeout factor according to speed/duplex */
3501 adapter->tx_timeout_factor = 1;
3502 switch (adapter->link_speed) {
3503 case SPEED_10:
3504 adapter->tx_timeout_factor = 14;
3505 break;
3506 case SPEED_100:
3507 /* maybe add some timeout factor ? */
3508 break;
3511 netif_carrier_on(netdev);
3513 igb_ping_all_vfs(adapter);
3515 /* link state has changed, schedule phy info update */
3516 if (!test_bit(__IGB_DOWN, &adapter->state))
3517 mod_timer(&adapter->phy_info_timer,
3518 round_jiffies(jiffies + 2 * HZ));
3520 } else {
3521 if (netif_carrier_ok(netdev)) {
3522 adapter->link_speed = 0;
3523 adapter->link_duplex = 0;
3524 /* Links status message must follow this format */
3525 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3526 netdev->name);
3527 netif_carrier_off(netdev);
3529 igb_ping_all_vfs(adapter);
3531 /* link state has changed, schedule phy info update */
3532 if (!test_bit(__IGB_DOWN, &adapter->state))
3533 mod_timer(&adapter->phy_info_timer,
3534 round_jiffies(jiffies + 2 * HZ));
3538 spin_lock(&adapter->stats64_lock);
3539 igb_update_stats(adapter, &adapter->stats64);
3540 spin_unlock(&adapter->stats64_lock);
3542 for (i = 0; i < adapter->num_tx_queues; i++) {
3543 struct igb_ring *tx_ring = adapter->tx_ring[i];
3544 if (!netif_carrier_ok(netdev)) {
3545 /* We've lost link, so the controller stops DMA,
3546 * but we've got queued Tx work that's never going
3547 * to get done, so reset controller to flush Tx.
3548 * (Do the reset outside of interrupt context). */
3549 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3550 adapter->tx_timeout_count++;
3551 schedule_work(&adapter->reset_task);
3552 /* return immediately since reset is imminent */
3553 return;
3557 /* Force detection of hung controller every watchdog period */
3558 tx_ring->detect_tx_hung = true;
3561 /* Cause software interrupt to ensure rx ring is cleaned */
3562 if (adapter->msix_entries) {
3563 u32 eics = 0;
3564 for (i = 0; i < adapter->num_q_vectors; i++) {
3565 struct igb_q_vector *q_vector = adapter->q_vector[i];
3566 eics |= q_vector->eims_value;
3568 wr32(E1000_EICS, eics);
3569 } else {
3570 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3573 igb_spoof_check(adapter);
3575 /* Reset the timer */
3576 if (!test_bit(__IGB_DOWN, &adapter->state))
3577 mod_timer(&adapter->watchdog_timer,
3578 round_jiffies(jiffies + 2 * HZ));
3581 enum latency_range {
3582 lowest_latency = 0,
3583 low_latency = 1,
3584 bulk_latency = 2,
3585 latency_invalid = 255
3589 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3591 * Stores a new ITR value based on strictly on packet size. This
3592 * algorithm is less sophisticated than that used in igb_update_itr,
3593 * due to the difficulty of synchronizing statistics across multiple
3594 * receive rings. The divisors and thresholds used by this function
3595 * were determined based on theoretical maximum wire speed and testing
3596 * data, in order to minimize response time while increasing bulk
3597 * throughput.
3598 * This functionality is controlled by the InterruptThrottleRate module
3599 * parameter (see igb_param.c)
3600 * NOTE: This function is called only when operating in a multiqueue
3601 * receive environment.
3602 * @q_vector: pointer to q_vector
3604 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3606 int new_val = q_vector->itr_val;
3607 int avg_wire_size = 0;
3608 struct igb_adapter *adapter = q_vector->adapter;
3609 struct igb_ring *ring;
3610 unsigned int packets;
3612 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3613 * ints/sec - ITR timer value of 120 ticks.
3615 if (adapter->link_speed != SPEED_1000) {
3616 new_val = 976;
3617 goto set_itr_val;
3620 ring = q_vector->rx_ring;
3621 if (ring) {
3622 packets = ACCESS_ONCE(ring->total_packets);
3624 if (packets)
3625 avg_wire_size = ring->total_bytes / packets;
3628 ring = q_vector->tx_ring;
3629 if (ring) {
3630 packets = ACCESS_ONCE(ring->total_packets);
3632 if (packets)
3633 avg_wire_size = max_t(u32, avg_wire_size,
3634 ring->total_bytes / packets);
3637 /* if avg_wire_size isn't set no work was done */
3638 if (!avg_wire_size)
3639 goto clear_counts;
3641 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3642 avg_wire_size += 24;
3644 /* Don't starve jumbo frames */
3645 avg_wire_size = min(avg_wire_size, 3000);
3647 /* Give a little boost to mid-size frames */
3648 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3649 new_val = avg_wire_size / 3;
3650 else
3651 new_val = avg_wire_size / 2;
3653 /* when in itr mode 3 do not exceed 20K ints/sec */
3654 if (adapter->rx_itr_setting == 3 && new_val < 196)
3655 new_val = 196;
3657 set_itr_val:
3658 if (new_val != q_vector->itr_val) {
3659 q_vector->itr_val = new_val;
3660 q_vector->set_itr = 1;
3662 clear_counts:
3663 if (q_vector->rx_ring) {
3664 q_vector->rx_ring->total_bytes = 0;
3665 q_vector->rx_ring->total_packets = 0;
3667 if (q_vector->tx_ring) {
3668 q_vector->tx_ring->total_bytes = 0;
3669 q_vector->tx_ring->total_packets = 0;
3674 * igb_update_itr - update the dynamic ITR value based on statistics
3675 * Stores a new ITR value based on packets and byte
3676 * counts during the last interrupt. The advantage of per interrupt
3677 * computation is faster updates and more accurate ITR for the current
3678 * traffic pattern. Constants in this function were computed
3679 * based on theoretical maximum wire speed and thresholds were set based
3680 * on testing data as well as attempting to minimize response time
3681 * while increasing bulk throughput.
3682 * this functionality is controlled by the InterruptThrottleRate module
3683 * parameter (see igb_param.c)
3684 * NOTE: These calculations are only valid when operating in a single-
3685 * queue environment.
3686 * @adapter: pointer to adapter
3687 * @itr_setting: current q_vector->itr_val
3688 * @packets: the number of packets during this measurement interval
3689 * @bytes: the number of bytes during this measurement interval
3691 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3692 int packets, int bytes)
3694 unsigned int retval = itr_setting;
3696 if (packets == 0)
3697 goto update_itr_done;
3699 switch (itr_setting) {
3700 case lowest_latency:
3701 /* handle TSO and jumbo frames */
3702 if (bytes/packets > 8000)
3703 retval = bulk_latency;
3704 else if ((packets < 5) && (bytes > 512))
3705 retval = low_latency;
3706 break;
3707 case low_latency: /* 50 usec aka 20000 ints/s */
3708 if (bytes > 10000) {
3709 /* this if handles the TSO accounting */
3710 if (bytes/packets > 8000) {
3711 retval = bulk_latency;
3712 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3713 retval = bulk_latency;
3714 } else if ((packets > 35)) {
3715 retval = lowest_latency;
3717 } else if (bytes/packets > 2000) {
3718 retval = bulk_latency;
3719 } else if (packets <= 2 && bytes < 512) {
3720 retval = lowest_latency;
3722 break;
3723 case bulk_latency: /* 250 usec aka 4000 ints/s */
3724 if (bytes > 25000) {
3725 if (packets > 35)
3726 retval = low_latency;
3727 } else if (bytes < 1500) {
3728 retval = low_latency;
3730 break;
3733 update_itr_done:
3734 return retval;
3737 static void igb_set_itr(struct igb_adapter *adapter)
3739 struct igb_q_vector *q_vector = adapter->q_vector[0];
3740 u16 current_itr;
3741 u32 new_itr = q_vector->itr_val;
3743 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3744 if (adapter->link_speed != SPEED_1000) {
3745 current_itr = 0;
3746 new_itr = 4000;
3747 goto set_itr_now;
3750 adapter->rx_itr = igb_update_itr(adapter,
3751 adapter->rx_itr,
3752 q_vector->rx_ring->total_packets,
3753 q_vector->rx_ring->total_bytes);
3755 adapter->tx_itr = igb_update_itr(adapter,
3756 adapter->tx_itr,
3757 q_vector->tx_ring->total_packets,
3758 q_vector->tx_ring->total_bytes);
3759 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3761 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3762 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3763 current_itr = low_latency;
3765 switch (current_itr) {
3766 /* counts and packets in update_itr are dependent on these numbers */
3767 case lowest_latency:
3768 new_itr = 56; /* aka 70,000 ints/sec */
3769 break;
3770 case low_latency:
3771 new_itr = 196; /* aka 20,000 ints/sec */
3772 break;
3773 case bulk_latency:
3774 new_itr = 980; /* aka 4,000 ints/sec */
3775 break;
3776 default:
3777 break;
3780 set_itr_now:
3781 q_vector->rx_ring->total_bytes = 0;
3782 q_vector->rx_ring->total_packets = 0;
3783 q_vector->tx_ring->total_bytes = 0;
3784 q_vector->tx_ring->total_packets = 0;
3786 if (new_itr != q_vector->itr_val) {
3787 /* this attempts to bias the interrupt rate towards Bulk
3788 * by adding intermediate steps when interrupt rate is
3789 * increasing */
3790 new_itr = new_itr > q_vector->itr_val ?
3791 max((new_itr * q_vector->itr_val) /
3792 (new_itr + (q_vector->itr_val >> 2)),
3793 new_itr) :
3794 new_itr;
3795 /* Don't write the value here; it resets the adapter's
3796 * internal timer, and causes us to delay far longer than
3797 * we should between interrupts. Instead, we write the ITR
3798 * value at the beginning of the next interrupt so the timing
3799 * ends up being correct.
3801 q_vector->itr_val = new_itr;
3802 q_vector->set_itr = 1;
3806 #define IGB_TX_FLAGS_CSUM 0x00000001
3807 #define IGB_TX_FLAGS_VLAN 0x00000002
3808 #define IGB_TX_FLAGS_TSO 0x00000004
3809 #define IGB_TX_FLAGS_IPV4 0x00000008
3810 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3811 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3812 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3814 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3815 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3817 struct e1000_adv_tx_context_desc *context_desc;
3818 unsigned int i;
3819 int err;
3820 struct igb_buffer *buffer_info;
3821 u32 info = 0, tu_cmd = 0;
3822 u32 mss_l4len_idx;
3823 u8 l4len;
3825 if (skb_header_cloned(skb)) {
3826 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3827 if (err)
3828 return err;
3831 l4len = tcp_hdrlen(skb);
3832 *hdr_len += l4len;
3834 if (skb->protocol == htons(ETH_P_IP)) {
3835 struct iphdr *iph = ip_hdr(skb);
3836 iph->tot_len = 0;
3837 iph->check = 0;
3838 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3839 iph->daddr, 0,
3840 IPPROTO_TCP,
3842 } else if (skb_is_gso_v6(skb)) {
3843 ipv6_hdr(skb)->payload_len = 0;
3844 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3845 &ipv6_hdr(skb)->daddr,
3846 0, IPPROTO_TCP, 0);
3849 i = tx_ring->next_to_use;
3851 buffer_info = &tx_ring->buffer_info[i];
3852 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3853 /* VLAN MACLEN IPLEN */
3854 if (tx_flags & IGB_TX_FLAGS_VLAN)
3855 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3856 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3857 *hdr_len += skb_network_offset(skb);
3858 info |= skb_network_header_len(skb);
3859 *hdr_len += skb_network_header_len(skb);
3860 context_desc->vlan_macip_lens = cpu_to_le32(info);
3862 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3863 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3865 if (skb->protocol == htons(ETH_P_IP))
3866 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3867 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3869 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3871 /* MSS L4LEN IDX */
3872 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3873 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3875 /* For 82575, context index must be unique per ring. */
3876 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3877 mss_l4len_idx |= tx_ring->reg_idx << 4;
3879 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3880 context_desc->seqnum_seed = 0;
3882 buffer_info->time_stamp = jiffies;
3883 buffer_info->next_to_watch = i;
3884 buffer_info->dma = 0;
3885 i++;
3886 if (i == tx_ring->count)
3887 i = 0;
3889 tx_ring->next_to_use = i;
3891 return true;
3894 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3895 struct sk_buff *skb, u32 tx_flags)
3897 struct e1000_adv_tx_context_desc *context_desc;
3898 struct device *dev = tx_ring->dev;
3899 struct igb_buffer *buffer_info;
3900 u32 info = 0, tu_cmd = 0;
3901 unsigned int i;
3903 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3904 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3905 i = tx_ring->next_to_use;
3906 buffer_info = &tx_ring->buffer_info[i];
3907 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3909 if (tx_flags & IGB_TX_FLAGS_VLAN)
3910 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3912 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3913 if (skb->ip_summed == CHECKSUM_PARTIAL)
3914 info |= skb_network_header_len(skb);
3916 context_desc->vlan_macip_lens = cpu_to_le32(info);
3918 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3920 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3921 __be16 protocol;
3923 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3924 const struct vlan_ethhdr *vhdr =
3925 (const struct vlan_ethhdr*)skb->data;
3927 protocol = vhdr->h_vlan_encapsulated_proto;
3928 } else {
3929 protocol = skb->protocol;
3932 switch (protocol) {
3933 case cpu_to_be16(ETH_P_IP):
3934 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3935 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3936 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3937 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3938 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3939 break;
3940 case cpu_to_be16(ETH_P_IPV6):
3941 /* XXX what about other V6 headers?? */
3942 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3943 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3944 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3945 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3946 break;
3947 default:
3948 if (unlikely(net_ratelimit()))
3949 dev_warn(dev,
3950 "partial checksum but proto=%x!\n",
3951 skb->protocol);
3952 break;
3956 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3957 context_desc->seqnum_seed = 0;
3958 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3959 context_desc->mss_l4len_idx =
3960 cpu_to_le32(tx_ring->reg_idx << 4);
3962 buffer_info->time_stamp = jiffies;
3963 buffer_info->next_to_watch = i;
3964 buffer_info->dma = 0;
3966 i++;
3967 if (i == tx_ring->count)
3968 i = 0;
3969 tx_ring->next_to_use = i;
3971 return true;
3973 return false;
3976 #define IGB_MAX_TXD_PWR 16
3977 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3979 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3980 unsigned int first)
3982 struct igb_buffer *buffer_info;
3983 struct device *dev = tx_ring->dev;
3984 unsigned int hlen = skb_headlen(skb);
3985 unsigned int count = 0, i;
3986 unsigned int f;
3987 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3989 i = tx_ring->next_to_use;
3991 buffer_info = &tx_ring->buffer_info[i];
3992 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
3993 buffer_info->length = hlen;
3994 /* set time_stamp *before* dma to help avoid a possible race */
3995 buffer_info->time_stamp = jiffies;
3996 buffer_info->next_to_watch = i;
3997 buffer_info->dma = dma_map_single(dev, skb->data, hlen,
3998 DMA_TO_DEVICE);
3999 if (dma_mapping_error(dev, buffer_info->dma))
4000 goto dma_error;
4002 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4003 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4004 unsigned int len = frag->size;
4006 count++;
4007 i++;
4008 if (i == tx_ring->count)
4009 i = 0;
4011 buffer_info = &tx_ring->buffer_info[i];
4012 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4013 buffer_info->length = len;
4014 buffer_info->time_stamp = jiffies;
4015 buffer_info->next_to_watch = i;
4016 buffer_info->mapped_as_page = true;
4017 buffer_info->dma = dma_map_page(dev,
4018 frag->page,
4019 frag->page_offset,
4020 len,
4021 DMA_TO_DEVICE);
4022 if (dma_mapping_error(dev, buffer_info->dma))
4023 goto dma_error;
4027 tx_ring->buffer_info[i].skb = skb;
4028 tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4029 /* multiply data chunks by size of headers */
4030 tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4031 tx_ring->buffer_info[i].gso_segs = gso_segs;
4032 tx_ring->buffer_info[first].next_to_watch = i;
4034 return ++count;
4036 dma_error:
4037 dev_err(dev, "TX DMA map failed\n");
4039 /* clear timestamp and dma mappings for failed buffer_info mapping */
4040 buffer_info->dma = 0;
4041 buffer_info->time_stamp = 0;
4042 buffer_info->length = 0;
4043 buffer_info->next_to_watch = 0;
4044 buffer_info->mapped_as_page = false;
4046 /* clear timestamp and dma mappings for remaining portion of packet */
4047 while (count--) {
4048 if (i == 0)
4049 i = tx_ring->count;
4050 i--;
4051 buffer_info = &tx_ring->buffer_info[i];
4052 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4055 return 0;
4058 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4059 u32 tx_flags, int count, u32 paylen,
4060 u8 hdr_len)
4062 union e1000_adv_tx_desc *tx_desc;
4063 struct igb_buffer *buffer_info;
4064 u32 olinfo_status = 0, cmd_type_len;
4065 unsigned int i = tx_ring->next_to_use;
4067 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4068 E1000_ADVTXD_DCMD_DEXT);
4070 if (tx_flags & IGB_TX_FLAGS_VLAN)
4071 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4073 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4074 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4076 if (tx_flags & IGB_TX_FLAGS_TSO) {
4077 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4079 /* insert tcp checksum */
4080 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4082 /* insert ip checksum */
4083 if (tx_flags & IGB_TX_FLAGS_IPV4)
4084 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4086 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4087 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4090 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4091 (tx_flags & (IGB_TX_FLAGS_CSUM |
4092 IGB_TX_FLAGS_TSO |
4093 IGB_TX_FLAGS_VLAN)))
4094 olinfo_status |= tx_ring->reg_idx << 4;
4096 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4098 do {
4099 buffer_info = &tx_ring->buffer_info[i];
4100 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4101 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4102 tx_desc->read.cmd_type_len =
4103 cpu_to_le32(cmd_type_len | buffer_info->length);
4104 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4105 count--;
4106 i++;
4107 if (i == tx_ring->count)
4108 i = 0;
4109 } while (count > 0);
4111 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4112 /* Force memory writes to complete before letting h/w
4113 * know there are new descriptors to fetch. (Only
4114 * applicable for weak-ordered memory model archs,
4115 * such as IA-64). */
4116 wmb();
4118 tx_ring->next_to_use = i;
4119 writel(i, tx_ring->tail);
4120 /* we need this if more than one processor can write to our tail
4121 * at a time, it syncronizes IO on IA64/Altix systems */
4122 mmiowb();
4125 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4127 struct net_device *netdev = tx_ring->netdev;
4129 netif_stop_subqueue(netdev, tx_ring->queue_index);
4131 /* Herbert's original patch had:
4132 * smp_mb__after_netif_stop_queue();
4133 * but since that doesn't exist yet, just open code it. */
4134 smp_mb();
4136 /* We need to check again in a case another CPU has just
4137 * made room available. */
4138 if (igb_desc_unused(tx_ring) < size)
4139 return -EBUSY;
4141 /* A reprieve! */
4142 netif_wake_subqueue(netdev, tx_ring->queue_index);
4144 u64_stats_update_begin(&tx_ring->tx_syncp2);
4145 tx_ring->tx_stats.restart_queue2++;
4146 u64_stats_update_end(&tx_ring->tx_syncp2);
4148 return 0;
4151 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4153 if (igb_desc_unused(tx_ring) >= size)
4154 return 0;
4155 return __igb_maybe_stop_tx(tx_ring, size);
4158 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4159 struct igb_ring *tx_ring)
4161 int tso = 0, count;
4162 u32 tx_flags = 0;
4163 u16 first;
4164 u8 hdr_len = 0;
4166 /* need: 1 descriptor per page,
4167 * + 2 desc gap to keep tail from touching head,
4168 * + 1 desc for skb->data,
4169 * + 1 desc for context descriptor,
4170 * otherwise try next time */
4171 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4172 /* this is a hard error */
4173 return NETDEV_TX_BUSY;
4176 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4177 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4178 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4181 if (vlan_tx_tag_present(skb)) {
4182 tx_flags |= IGB_TX_FLAGS_VLAN;
4183 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4186 if (skb->protocol == htons(ETH_P_IP))
4187 tx_flags |= IGB_TX_FLAGS_IPV4;
4189 first = tx_ring->next_to_use;
4190 if (skb_is_gso(skb)) {
4191 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4193 if (tso < 0) {
4194 dev_kfree_skb_any(skb);
4195 return NETDEV_TX_OK;
4199 if (tso)
4200 tx_flags |= IGB_TX_FLAGS_TSO;
4201 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4202 (skb->ip_summed == CHECKSUM_PARTIAL))
4203 tx_flags |= IGB_TX_FLAGS_CSUM;
4206 * count reflects descriptors mapped, if 0 or less then mapping error
4207 * has occured and we need to rewind the descriptor queue
4209 count = igb_tx_map_adv(tx_ring, skb, first);
4210 if (!count) {
4211 dev_kfree_skb_any(skb);
4212 tx_ring->buffer_info[first].time_stamp = 0;
4213 tx_ring->next_to_use = first;
4214 return NETDEV_TX_OK;
4217 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4219 /* Make sure there is space in the ring for the next send. */
4220 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4222 return NETDEV_TX_OK;
4225 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4226 struct net_device *netdev)
4228 struct igb_adapter *adapter = netdev_priv(netdev);
4229 struct igb_ring *tx_ring;
4230 int r_idx = 0;
4232 if (test_bit(__IGB_DOWN, &adapter->state)) {
4233 dev_kfree_skb_any(skb);
4234 return NETDEV_TX_OK;
4237 if (skb->len <= 0) {
4238 dev_kfree_skb_any(skb);
4239 return NETDEV_TX_OK;
4242 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4243 tx_ring = adapter->multi_tx_table[r_idx];
4245 /* This goes back to the question of how to logically map a tx queue
4246 * to a flow. Right now, performance is impacted slightly negatively
4247 * if using multiple tx queues. If the stack breaks away from a
4248 * single qdisc implementation, we can look at this again. */
4249 return igb_xmit_frame_ring_adv(skb, tx_ring);
4253 * igb_tx_timeout - Respond to a Tx Hang
4254 * @netdev: network interface device structure
4256 static void igb_tx_timeout(struct net_device *netdev)
4258 struct igb_adapter *adapter = netdev_priv(netdev);
4259 struct e1000_hw *hw = &adapter->hw;
4261 /* Do the reset outside of interrupt context */
4262 adapter->tx_timeout_count++;
4264 if (hw->mac.type == e1000_82580)
4265 hw->dev_spec._82575.global_device_reset = true;
4267 schedule_work(&adapter->reset_task);
4268 wr32(E1000_EICS,
4269 (adapter->eims_enable_mask & ~adapter->eims_other));
4272 static void igb_reset_task(struct work_struct *work)
4274 struct igb_adapter *adapter;
4275 adapter = container_of(work, struct igb_adapter, reset_task);
4277 igb_dump(adapter);
4278 netdev_err(adapter->netdev, "Reset adapter\n");
4279 igb_reinit_locked(adapter);
4283 * igb_get_stats64 - Get System Network Statistics
4284 * @netdev: network interface device structure
4285 * @stats: rtnl_link_stats64 pointer
4288 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4289 struct rtnl_link_stats64 *stats)
4291 struct igb_adapter *adapter = netdev_priv(netdev);
4293 spin_lock(&adapter->stats64_lock);
4294 igb_update_stats(adapter, &adapter->stats64);
4295 memcpy(stats, &adapter->stats64, sizeof(*stats));
4296 spin_unlock(&adapter->stats64_lock);
4298 return stats;
4302 * igb_change_mtu - Change the Maximum Transfer Unit
4303 * @netdev: network interface device structure
4304 * @new_mtu: new value for maximum frame size
4306 * Returns 0 on success, negative on failure
4308 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4310 struct igb_adapter *adapter = netdev_priv(netdev);
4311 struct pci_dev *pdev = adapter->pdev;
4312 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4313 u32 rx_buffer_len, i;
4315 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4316 dev_err(&pdev->dev, "Invalid MTU setting\n");
4317 return -EINVAL;
4320 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4321 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4322 return -EINVAL;
4325 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4326 msleep(1);
4328 /* igb_down has a dependency on max_frame_size */
4329 adapter->max_frame_size = max_frame;
4331 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4332 * means we reserve 2 more, this pushes us to allocate from the next
4333 * larger slab size.
4334 * i.e. RXBUFFER_2048 --> size-4096 slab
4337 if (adapter->hw.mac.type == e1000_82580)
4338 max_frame += IGB_TS_HDR_LEN;
4340 if (max_frame <= IGB_RXBUFFER_1024)
4341 rx_buffer_len = IGB_RXBUFFER_1024;
4342 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4343 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4344 else
4345 rx_buffer_len = IGB_RXBUFFER_128;
4347 if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4348 (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4349 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4351 if ((adapter->hw.mac.type == e1000_82580) &&
4352 (rx_buffer_len == IGB_RXBUFFER_128))
4353 rx_buffer_len += IGB_RXBUFFER_64;
4355 if (netif_running(netdev))
4356 igb_down(adapter);
4358 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4359 netdev->mtu, new_mtu);
4360 netdev->mtu = new_mtu;
4362 for (i = 0; i < adapter->num_rx_queues; i++)
4363 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4365 if (netif_running(netdev))
4366 igb_up(adapter);
4367 else
4368 igb_reset(adapter);
4370 clear_bit(__IGB_RESETTING, &adapter->state);
4372 return 0;
4376 * igb_update_stats - Update the board statistics counters
4377 * @adapter: board private structure
4380 void igb_update_stats(struct igb_adapter *adapter,
4381 struct rtnl_link_stats64 *net_stats)
4383 struct e1000_hw *hw = &adapter->hw;
4384 struct pci_dev *pdev = adapter->pdev;
4385 u32 reg, mpc;
4386 u16 phy_tmp;
4387 int i;
4388 u64 bytes, packets;
4389 unsigned int start;
4390 u64 _bytes, _packets;
4392 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4395 * Prevent stats update while adapter is being reset, or if the pci
4396 * connection is down.
4398 if (adapter->link_speed == 0)
4399 return;
4400 if (pci_channel_offline(pdev))
4401 return;
4403 bytes = 0;
4404 packets = 0;
4405 for (i = 0; i < adapter->num_rx_queues; i++) {
4406 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4407 struct igb_ring *ring = adapter->rx_ring[i];
4409 ring->rx_stats.drops += rqdpc_tmp;
4410 net_stats->rx_fifo_errors += rqdpc_tmp;
4412 do {
4413 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4414 _bytes = ring->rx_stats.bytes;
4415 _packets = ring->rx_stats.packets;
4416 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4417 bytes += _bytes;
4418 packets += _packets;
4421 net_stats->rx_bytes = bytes;
4422 net_stats->rx_packets = packets;
4424 bytes = 0;
4425 packets = 0;
4426 for (i = 0; i < adapter->num_tx_queues; i++) {
4427 struct igb_ring *ring = adapter->tx_ring[i];
4428 do {
4429 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4430 _bytes = ring->tx_stats.bytes;
4431 _packets = ring->tx_stats.packets;
4432 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4433 bytes += _bytes;
4434 packets += _packets;
4436 net_stats->tx_bytes = bytes;
4437 net_stats->tx_packets = packets;
4439 /* read stats registers */
4440 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4441 adapter->stats.gprc += rd32(E1000_GPRC);
4442 adapter->stats.gorc += rd32(E1000_GORCL);
4443 rd32(E1000_GORCH); /* clear GORCL */
4444 adapter->stats.bprc += rd32(E1000_BPRC);
4445 adapter->stats.mprc += rd32(E1000_MPRC);
4446 adapter->stats.roc += rd32(E1000_ROC);
4448 adapter->stats.prc64 += rd32(E1000_PRC64);
4449 adapter->stats.prc127 += rd32(E1000_PRC127);
4450 adapter->stats.prc255 += rd32(E1000_PRC255);
4451 adapter->stats.prc511 += rd32(E1000_PRC511);
4452 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4453 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4454 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4455 adapter->stats.sec += rd32(E1000_SEC);
4457 mpc = rd32(E1000_MPC);
4458 adapter->stats.mpc += mpc;
4459 net_stats->rx_fifo_errors += mpc;
4460 adapter->stats.scc += rd32(E1000_SCC);
4461 adapter->stats.ecol += rd32(E1000_ECOL);
4462 adapter->stats.mcc += rd32(E1000_MCC);
4463 adapter->stats.latecol += rd32(E1000_LATECOL);
4464 adapter->stats.dc += rd32(E1000_DC);
4465 adapter->stats.rlec += rd32(E1000_RLEC);
4466 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4467 adapter->stats.xontxc += rd32(E1000_XONTXC);
4468 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4469 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4470 adapter->stats.fcruc += rd32(E1000_FCRUC);
4471 adapter->stats.gptc += rd32(E1000_GPTC);
4472 adapter->stats.gotc += rd32(E1000_GOTCL);
4473 rd32(E1000_GOTCH); /* clear GOTCL */
4474 adapter->stats.rnbc += rd32(E1000_RNBC);
4475 adapter->stats.ruc += rd32(E1000_RUC);
4476 adapter->stats.rfc += rd32(E1000_RFC);
4477 adapter->stats.rjc += rd32(E1000_RJC);
4478 adapter->stats.tor += rd32(E1000_TORH);
4479 adapter->stats.tot += rd32(E1000_TOTH);
4480 adapter->stats.tpr += rd32(E1000_TPR);
4482 adapter->stats.ptc64 += rd32(E1000_PTC64);
4483 adapter->stats.ptc127 += rd32(E1000_PTC127);
4484 adapter->stats.ptc255 += rd32(E1000_PTC255);
4485 adapter->stats.ptc511 += rd32(E1000_PTC511);
4486 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4487 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4489 adapter->stats.mptc += rd32(E1000_MPTC);
4490 adapter->stats.bptc += rd32(E1000_BPTC);
4492 adapter->stats.tpt += rd32(E1000_TPT);
4493 adapter->stats.colc += rd32(E1000_COLC);
4495 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4496 /* read internal phy specific stats */
4497 reg = rd32(E1000_CTRL_EXT);
4498 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4499 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4500 adapter->stats.tncrs += rd32(E1000_TNCRS);
4503 adapter->stats.tsctc += rd32(E1000_TSCTC);
4504 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4506 adapter->stats.iac += rd32(E1000_IAC);
4507 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4508 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4509 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4510 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4511 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4512 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4513 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4514 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4516 /* Fill out the OS statistics structure */
4517 net_stats->multicast = adapter->stats.mprc;
4518 net_stats->collisions = adapter->stats.colc;
4520 /* Rx Errors */
4522 /* RLEC on some newer hardware can be incorrect so build
4523 * our own version based on RUC and ROC */
4524 net_stats->rx_errors = adapter->stats.rxerrc +
4525 adapter->stats.crcerrs + adapter->stats.algnerrc +
4526 adapter->stats.ruc + adapter->stats.roc +
4527 adapter->stats.cexterr;
4528 net_stats->rx_length_errors = adapter->stats.ruc +
4529 adapter->stats.roc;
4530 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4531 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4532 net_stats->rx_missed_errors = adapter->stats.mpc;
4534 /* Tx Errors */
4535 net_stats->tx_errors = adapter->stats.ecol +
4536 adapter->stats.latecol;
4537 net_stats->tx_aborted_errors = adapter->stats.ecol;
4538 net_stats->tx_window_errors = adapter->stats.latecol;
4539 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4541 /* Tx Dropped needs to be maintained elsewhere */
4543 /* Phy Stats */
4544 if (hw->phy.media_type == e1000_media_type_copper) {
4545 if ((adapter->link_speed == SPEED_1000) &&
4546 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4547 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4548 adapter->phy_stats.idle_errors += phy_tmp;
4552 /* Management Stats */
4553 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4554 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4555 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4558 static irqreturn_t igb_msix_other(int irq, void *data)
4560 struct igb_adapter *adapter = data;
4561 struct e1000_hw *hw = &adapter->hw;
4562 u32 icr = rd32(E1000_ICR);
4563 /* reading ICR causes bit 31 of EICR to be cleared */
4565 if (icr & E1000_ICR_DRSTA)
4566 schedule_work(&adapter->reset_task);
4568 if (icr & E1000_ICR_DOUTSYNC) {
4569 /* HW is reporting DMA is out of sync */
4570 adapter->stats.doosync++;
4571 /* The DMA Out of Sync is also indication of a spoof event
4572 * in IOV mode. Check the Wrong VM Behavior register to
4573 * see if it is really a spoof event. */
4574 igb_check_wvbr(adapter);
4577 /* Check for a mailbox event */
4578 if (icr & E1000_ICR_VMMB)
4579 igb_msg_task(adapter);
4581 if (icr & E1000_ICR_LSC) {
4582 hw->mac.get_link_status = 1;
4583 /* guard against interrupt when we're going down */
4584 if (!test_bit(__IGB_DOWN, &adapter->state))
4585 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4588 if (adapter->vfs_allocated_count)
4589 wr32(E1000_IMS, E1000_IMS_LSC |
4590 E1000_IMS_VMMB |
4591 E1000_IMS_DOUTSYNC);
4592 else
4593 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4594 wr32(E1000_EIMS, adapter->eims_other);
4596 return IRQ_HANDLED;
4599 static void igb_write_itr(struct igb_q_vector *q_vector)
4601 struct igb_adapter *adapter = q_vector->adapter;
4602 u32 itr_val = q_vector->itr_val & 0x7FFC;
4604 if (!q_vector->set_itr)
4605 return;
4607 if (!itr_val)
4608 itr_val = 0x4;
4610 if (adapter->hw.mac.type == e1000_82575)
4611 itr_val |= itr_val << 16;
4612 else
4613 itr_val |= 0x8000000;
4615 writel(itr_val, q_vector->itr_register);
4616 q_vector->set_itr = 0;
4619 static irqreturn_t igb_msix_ring(int irq, void *data)
4621 struct igb_q_vector *q_vector = data;
4623 /* Write the ITR value calculated from the previous interrupt. */
4624 igb_write_itr(q_vector);
4626 napi_schedule(&q_vector->napi);
4628 return IRQ_HANDLED;
4631 #ifdef CONFIG_IGB_DCA
4632 static void igb_update_dca(struct igb_q_vector *q_vector)
4634 struct igb_adapter *adapter = q_vector->adapter;
4635 struct e1000_hw *hw = &adapter->hw;
4636 int cpu = get_cpu();
4638 if (q_vector->cpu == cpu)
4639 goto out_no_update;
4641 if (q_vector->tx_ring) {
4642 int q = q_vector->tx_ring->reg_idx;
4643 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4644 if (hw->mac.type == e1000_82575) {
4645 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4646 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4647 } else {
4648 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4649 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4650 E1000_DCA_TXCTRL_CPUID_SHIFT;
4652 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4653 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4655 if (q_vector->rx_ring) {
4656 int q = q_vector->rx_ring->reg_idx;
4657 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4658 if (hw->mac.type == e1000_82575) {
4659 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4660 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4661 } else {
4662 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4663 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4664 E1000_DCA_RXCTRL_CPUID_SHIFT;
4666 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4667 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4668 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4669 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4671 q_vector->cpu = cpu;
4672 out_no_update:
4673 put_cpu();
4676 static void igb_setup_dca(struct igb_adapter *adapter)
4678 struct e1000_hw *hw = &adapter->hw;
4679 int i;
4681 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4682 return;
4684 /* Always use CB2 mode, difference is masked in the CB driver. */
4685 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4687 for (i = 0; i < adapter->num_q_vectors; i++) {
4688 adapter->q_vector[i]->cpu = -1;
4689 igb_update_dca(adapter->q_vector[i]);
4693 static int __igb_notify_dca(struct device *dev, void *data)
4695 struct net_device *netdev = dev_get_drvdata(dev);
4696 struct igb_adapter *adapter = netdev_priv(netdev);
4697 struct pci_dev *pdev = adapter->pdev;
4698 struct e1000_hw *hw = &adapter->hw;
4699 unsigned long event = *(unsigned long *)data;
4701 switch (event) {
4702 case DCA_PROVIDER_ADD:
4703 /* if already enabled, don't do it again */
4704 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4705 break;
4706 if (dca_add_requester(dev) == 0) {
4707 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4708 dev_info(&pdev->dev, "DCA enabled\n");
4709 igb_setup_dca(adapter);
4710 break;
4712 /* Fall Through since DCA is disabled. */
4713 case DCA_PROVIDER_REMOVE:
4714 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4715 /* without this a class_device is left
4716 * hanging around in the sysfs model */
4717 dca_remove_requester(dev);
4718 dev_info(&pdev->dev, "DCA disabled\n");
4719 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4720 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4722 break;
4725 return 0;
4728 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4729 void *p)
4731 int ret_val;
4733 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4734 __igb_notify_dca);
4736 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4738 #endif /* CONFIG_IGB_DCA */
4740 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4742 struct e1000_hw *hw = &adapter->hw;
4743 u32 ping;
4744 int i;
4746 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4747 ping = E1000_PF_CONTROL_MSG;
4748 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4749 ping |= E1000_VT_MSGTYPE_CTS;
4750 igb_write_mbx(hw, &ping, 1, i);
4754 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4756 struct e1000_hw *hw = &adapter->hw;
4757 u32 vmolr = rd32(E1000_VMOLR(vf));
4758 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4760 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4761 IGB_VF_FLAG_MULTI_PROMISC);
4762 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4764 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4765 vmolr |= E1000_VMOLR_MPME;
4766 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4767 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4768 } else {
4770 * if we have hashes and we are clearing a multicast promisc
4771 * flag we need to write the hashes to the MTA as this step
4772 * was previously skipped
4774 if (vf_data->num_vf_mc_hashes > 30) {
4775 vmolr |= E1000_VMOLR_MPME;
4776 } else if (vf_data->num_vf_mc_hashes) {
4777 int j;
4778 vmolr |= E1000_VMOLR_ROMPE;
4779 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4780 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4784 wr32(E1000_VMOLR(vf), vmolr);
4786 /* there are flags left unprocessed, likely not supported */
4787 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4788 return -EINVAL;
4790 return 0;
4794 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4795 u32 *msgbuf, u32 vf)
4797 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4798 u16 *hash_list = (u16 *)&msgbuf[1];
4799 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4800 int i;
4802 /* salt away the number of multicast addresses assigned
4803 * to this VF for later use to restore when the PF multi cast
4804 * list changes
4806 vf_data->num_vf_mc_hashes = n;
4808 /* only up to 30 hash values supported */
4809 if (n > 30)
4810 n = 30;
4812 /* store the hashes for later use */
4813 for (i = 0; i < n; i++)
4814 vf_data->vf_mc_hashes[i] = hash_list[i];
4816 /* Flush and reset the mta with the new values */
4817 igb_set_rx_mode(adapter->netdev);
4819 return 0;
4822 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4824 struct e1000_hw *hw = &adapter->hw;
4825 struct vf_data_storage *vf_data;
4826 int i, j;
4828 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4829 u32 vmolr = rd32(E1000_VMOLR(i));
4830 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4832 vf_data = &adapter->vf_data[i];
4834 if ((vf_data->num_vf_mc_hashes > 30) ||
4835 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4836 vmolr |= E1000_VMOLR_MPME;
4837 } else if (vf_data->num_vf_mc_hashes) {
4838 vmolr |= E1000_VMOLR_ROMPE;
4839 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4840 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4842 wr32(E1000_VMOLR(i), vmolr);
4846 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4848 struct e1000_hw *hw = &adapter->hw;
4849 u32 pool_mask, reg, vid;
4850 int i;
4852 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4854 /* Find the vlan filter for this id */
4855 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4856 reg = rd32(E1000_VLVF(i));
4858 /* remove the vf from the pool */
4859 reg &= ~pool_mask;
4861 /* if pool is empty then remove entry from vfta */
4862 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4863 (reg & E1000_VLVF_VLANID_ENABLE)) {
4864 reg = 0;
4865 vid = reg & E1000_VLVF_VLANID_MASK;
4866 igb_vfta_set(hw, vid, false);
4869 wr32(E1000_VLVF(i), reg);
4872 adapter->vf_data[vf].vlans_enabled = 0;
4875 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4877 struct e1000_hw *hw = &adapter->hw;
4878 u32 reg, i;
4880 /* The vlvf table only exists on 82576 hardware and newer */
4881 if (hw->mac.type < e1000_82576)
4882 return -1;
4884 /* we only need to do this if VMDq is enabled */
4885 if (!adapter->vfs_allocated_count)
4886 return -1;
4888 /* Find the vlan filter for this id */
4889 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4890 reg = rd32(E1000_VLVF(i));
4891 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4892 vid == (reg & E1000_VLVF_VLANID_MASK))
4893 break;
4896 if (add) {
4897 if (i == E1000_VLVF_ARRAY_SIZE) {
4898 /* Did not find a matching VLAN ID entry that was
4899 * enabled. Search for a free filter entry, i.e.
4900 * one without the enable bit set
4902 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4903 reg = rd32(E1000_VLVF(i));
4904 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4905 break;
4908 if (i < E1000_VLVF_ARRAY_SIZE) {
4909 /* Found an enabled/available entry */
4910 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4912 /* if !enabled we need to set this up in vfta */
4913 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4914 /* add VID to filter table */
4915 igb_vfta_set(hw, vid, true);
4916 reg |= E1000_VLVF_VLANID_ENABLE;
4918 reg &= ~E1000_VLVF_VLANID_MASK;
4919 reg |= vid;
4920 wr32(E1000_VLVF(i), reg);
4922 /* do not modify RLPML for PF devices */
4923 if (vf >= adapter->vfs_allocated_count)
4924 return 0;
4926 if (!adapter->vf_data[vf].vlans_enabled) {
4927 u32 size;
4928 reg = rd32(E1000_VMOLR(vf));
4929 size = reg & E1000_VMOLR_RLPML_MASK;
4930 size += 4;
4931 reg &= ~E1000_VMOLR_RLPML_MASK;
4932 reg |= size;
4933 wr32(E1000_VMOLR(vf), reg);
4936 adapter->vf_data[vf].vlans_enabled++;
4937 return 0;
4939 } else {
4940 if (i < E1000_VLVF_ARRAY_SIZE) {
4941 /* remove vf from the pool */
4942 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4943 /* if pool is empty then remove entry from vfta */
4944 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4945 reg = 0;
4946 igb_vfta_set(hw, vid, false);
4948 wr32(E1000_VLVF(i), reg);
4950 /* do not modify RLPML for PF devices */
4951 if (vf >= adapter->vfs_allocated_count)
4952 return 0;
4954 adapter->vf_data[vf].vlans_enabled--;
4955 if (!adapter->vf_data[vf].vlans_enabled) {
4956 u32 size;
4957 reg = rd32(E1000_VMOLR(vf));
4958 size = reg & E1000_VMOLR_RLPML_MASK;
4959 size -= 4;
4960 reg &= ~E1000_VMOLR_RLPML_MASK;
4961 reg |= size;
4962 wr32(E1000_VMOLR(vf), reg);
4966 return 0;
4969 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4971 struct e1000_hw *hw = &adapter->hw;
4973 if (vid)
4974 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4975 else
4976 wr32(E1000_VMVIR(vf), 0);
4979 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4980 int vf, u16 vlan, u8 qos)
4982 int err = 0;
4983 struct igb_adapter *adapter = netdev_priv(netdev);
4985 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4986 return -EINVAL;
4987 if (vlan || qos) {
4988 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4989 if (err)
4990 goto out;
4991 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4992 igb_set_vmolr(adapter, vf, !vlan);
4993 adapter->vf_data[vf].pf_vlan = vlan;
4994 adapter->vf_data[vf].pf_qos = qos;
4995 dev_info(&adapter->pdev->dev,
4996 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4997 if (test_bit(__IGB_DOWN, &adapter->state)) {
4998 dev_warn(&adapter->pdev->dev,
4999 "The VF VLAN has been set,"
5000 " but the PF device is not up.\n");
5001 dev_warn(&adapter->pdev->dev,
5002 "Bring the PF device up before"
5003 " attempting to use the VF device.\n");
5005 } else {
5006 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5007 false, vf);
5008 igb_set_vmvir(adapter, vlan, vf);
5009 igb_set_vmolr(adapter, vf, true);
5010 adapter->vf_data[vf].pf_vlan = 0;
5011 adapter->vf_data[vf].pf_qos = 0;
5013 out:
5014 return err;
5017 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5019 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5020 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5022 return igb_vlvf_set(adapter, vid, add, vf);
5025 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5027 /* clear flags - except flag that indicates PF has set the MAC */
5028 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5029 adapter->vf_data[vf].last_nack = jiffies;
5031 /* reset offloads to defaults */
5032 igb_set_vmolr(adapter, vf, true);
5034 /* reset vlans for device */
5035 igb_clear_vf_vfta(adapter, vf);
5036 if (adapter->vf_data[vf].pf_vlan)
5037 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5038 adapter->vf_data[vf].pf_vlan,
5039 adapter->vf_data[vf].pf_qos);
5040 else
5041 igb_clear_vf_vfta(adapter, vf);
5043 /* reset multicast table array for vf */
5044 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5046 /* Flush and reset the mta with the new values */
5047 igb_set_rx_mode(adapter->netdev);
5050 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5052 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5054 /* generate a new mac address as we were hotplug removed/added */
5055 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5056 random_ether_addr(vf_mac);
5058 /* process remaining reset events */
5059 igb_vf_reset(adapter, vf);
5062 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5064 struct e1000_hw *hw = &adapter->hw;
5065 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5066 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5067 u32 reg, msgbuf[3];
5068 u8 *addr = (u8 *)(&msgbuf[1]);
5070 /* process all the same items cleared in a function level reset */
5071 igb_vf_reset(adapter, vf);
5073 /* set vf mac address */
5074 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5076 /* enable transmit and receive for vf */
5077 reg = rd32(E1000_VFTE);
5078 wr32(E1000_VFTE, reg | (1 << vf));
5079 reg = rd32(E1000_VFRE);
5080 wr32(E1000_VFRE, reg | (1 << vf));
5082 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5084 /* reply to reset with ack and vf mac address */
5085 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5086 memcpy(addr, vf_mac, 6);
5087 igb_write_mbx(hw, msgbuf, 3, vf);
5090 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5093 * The VF MAC Address is stored in a packed array of bytes
5094 * starting at the second 32 bit word of the msg array
5096 unsigned char *addr = (char *)&msg[1];
5097 int err = -1;
5099 if (is_valid_ether_addr(addr))
5100 err = igb_set_vf_mac(adapter, vf, addr);
5102 return err;
5105 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5107 struct e1000_hw *hw = &adapter->hw;
5108 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5109 u32 msg = E1000_VT_MSGTYPE_NACK;
5111 /* if device isn't clear to send it shouldn't be reading either */
5112 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5113 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5114 igb_write_mbx(hw, &msg, 1, vf);
5115 vf_data->last_nack = jiffies;
5119 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5121 struct pci_dev *pdev = adapter->pdev;
5122 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5123 struct e1000_hw *hw = &adapter->hw;
5124 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5125 s32 retval;
5127 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5129 if (retval) {
5130 /* if receive failed revoke VF CTS stats and restart init */
5131 dev_err(&pdev->dev, "Error receiving message from VF\n");
5132 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5133 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5134 return;
5135 goto out;
5138 /* this is a message we already processed, do nothing */
5139 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5140 return;
5143 * until the vf completes a reset it should not be
5144 * allowed to start any configuration.
5147 if (msgbuf[0] == E1000_VF_RESET) {
5148 igb_vf_reset_msg(adapter, vf);
5149 return;
5152 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5153 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5154 return;
5155 retval = -1;
5156 goto out;
5159 switch ((msgbuf[0] & 0xFFFF)) {
5160 case E1000_VF_SET_MAC_ADDR:
5161 retval = -EINVAL;
5162 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5163 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5164 else
5165 dev_warn(&pdev->dev,
5166 "VF %d attempted to override administratively "
5167 "set MAC address\nReload the VF driver to "
5168 "resume operations\n", vf);
5169 break;
5170 case E1000_VF_SET_PROMISC:
5171 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5172 break;
5173 case E1000_VF_SET_MULTICAST:
5174 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5175 break;
5176 case E1000_VF_SET_LPE:
5177 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5178 break;
5179 case E1000_VF_SET_VLAN:
5180 retval = -1;
5181 if (vf_data->pf_vlan)
5182 dev_warn(&pdev->dev,
5183 "VF %d attempted to override administratively "
5184 "set VLAN tag\nReload the VF driver to "
5185 "resume operations\n", vf);
5186 else
5187 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5188 break;
5189 default:
5190 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5191 retval = -1;
5192 break;
5195 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5196 out:
5197 /* notify the VF of the results of what it sent us */
5198 if (retval)
5199 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5200 else
5201 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5203 igb_write_mbx(hw, msgbuf, 1, vf);
5206 static void igb_msg_task(struct igb_adapter *adapter)
5208 struct e1000_hw *hw = &adapter->hw;
5209 u32 vf;
5211 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5212 /* process any reset requests */
5213 if (!igb_check_for_rst(hw, vf))
5214 igb_vf_reset_event(adapter, vf);
5216 /* process any messages pending */
5217 if (!igb_check_for_msg(hw, vf))
5218 igb_rcv_msg_from_vf(adapter, vf);
5220 /* process any acks */
5221 if (!igb_check_for_ack(hw, vf))
5222 igb_rcv_ack_from_vf(adapter, vf);
5227 * igb_set_uta - Set unicast filter table address
5228 * @adapter: board private structure
5230 * The unicast table address is a register array of 32-bit registers.
5231 * The table is meant to be used in a way similar to how the MTA is used
5232 * however due to certain limitations in the hardware it is necessary to
5233 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5234 * enable bit to allow vlan tag stripping when promiscous mode is enabled
5236 static void igb_set_uta(struct igb_adapter *adapter)
5238 struct e1000_hw *hw = &adapter->hw;
5239 int i;
5241 /* The UTA table only exists on 82576 hardware and newer */
5242 if (hw->mac.type < e1000_82576)
5243 return;
5245 /* we only need to do this if VMDq is enabled */
5246 if (!adapter->vfs_allocated_count)
5247 return;
5249 for (i = 0; i < hw->mac.uta_reg_count; i++)
5250 array_wr32(E1000_UTA, i, ~0);
5254 * igb_intr_msi - Interrupt Handler
5255 * @irq: interrupt number
5256 * @data: pointer to a network interface device structure
5258 static irqreturn_t igb_intr_msi(int irq, void *data)
5260 struct igb_adapter *adapter = data;
5261 struct igb_q_vector *q_vector = adapter->q_vector[0];
5262 struct e1000_hw *hw = &adapter->hw;
5263 /* read ICR disables interrupts using IAM */
5264 u32 icr = rd32(E1000_ICR);
5266 igb_write_itr(q_vector);
5268 if (icr & E1000_ICR_DRSTA)
5269 schedule_work(&adapter->reset_task);
5271 if (icr & E1000_ICR_DOUTSYNC) {
5272 /* HW is reporting DMA is out of sync */
5273 adapter->stats.doosync++;
5276 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5277 hw->mac.get_link_status = 1;
5278 if (!test_bit(__IGB_DOWN, &adapter->state))
5279 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5282 napi_schedule(&q_vector->napi);
5284 return IRQ_HANDLED;
5288 * igb_intr - Legacy Interrupt Handler
5289 * @irq: interrupt number
5290 * @data: pointer to a network interface device structure
5292 static irqreturn_t igb_intr(int irq, void *data)
5294 struct igb_adapter *adapter = data;
5295 struct igb_q_vector *q_vector = adapter->q_vector[0];
5296 struct e1000_hw *hw = &adapter->hw;
5297 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5298 * need for the IMC write */
5299 u32 icr = rd32(E1000_ICR);
5300 if (!icr)
5301 return IRQ_NONE; /* Not our interrupt */
5303 igb_write_itr(q_vector);
5305 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5306 * not set, then the adapter didn't send an interrupt */
5307 if (!(icr & E1000_ICR_INT_ASSERTED))
5308 return IRQ_NONE;
5310 if (icr & E1000_ICR_DRSTA)
5311 schedule_work(&adapter->reset_task);
5313 if (icr & E1000_ICR_DOUTSYNC) {
5314 /* HW is reporting DMA is out of sync */
5315 adapter->stats.doosync++;
5318 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5319 hw->mac.get_link_status = 1;
5320 /* guard against interrupt when we're going down */
5321 if (!test_bit(__IGB_DOWN, &adapter->state))
5322 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5325 napi_schedule(&q_vector->napi);
5327 return IRQ_HANDLED;
5330 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5332 struct igb_adapter *adapter = q_vector->adapter;
5333 struct e1000_hw *hw = &adapter->hw;
5335 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5336 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5337 if (!adapter->msix_entries)
5338 igb_set_itr(adapter);
5339 else
5340 igb_update_ring_itr(q_vector);
5343 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5344 if (adapter->msix_entries)
5345 wr32(E1000_EIMS, q_vector->eims_value);
5346 else
5347 igb_irq_enable(adapter);
5352 * igb_poll - NAPI Rx polling callback
5353 * @napi: napi polling structure
5354 * @budget: count of how many packets we should handle
5356 static int igb_poll(struct napi_struct *napi, int budget)
5358 struct igb_q_vector *q_vector = container_of(napi,
5359 struct igb_q_vector,
5360 napi);
5361 int tx_clean_complete = 1, work_done = 0;
5363 #ifdef CONFIG_IGB_DCA
5364 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5365 igb_update_dca(q_vector);
5366 #endif
5367 if (q_vector->tx_ring)
5368 tx_clean_complete = igb_clean_tx_irq(q_vector);
5370 if (q_vector->rx_ring)
5371 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5373 if (!tx_clean_complete)
5374 work_done = budget;
5376 /* If not enough Rx work done, exit the polling mode */
5377 if (work_done < budget) {
5378 napi_complete(napi);
5379 igb_ring_irq_enable(q_vector);
5382 return work_done;
5386 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5387 * @adapter: board private structure
5388 * @shhwtstamps: timestamp structure to update
5389 * @regval: unsigned 64bit system time value.
5391 * We need to convert the system time value stored in the RX/TXSTMP registers
5392 * into a hwtstamp which can be used by the upper level timestamping functions
5394 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5395 struct skb_shared_hwtstamps *shhwtstamps,
5396 u64 regval)
5398 u64 ns;
5401 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5402 * 24 to match clock shift we setup earlier.
5404 if (adapter->hw.mac.type == e1000_82580)
5405 regval <<= IGB_82580_TSYNC_SHIFT;
5407 ns = timecounter_cyc2time(&adapter->clock, regval);
5408 timecompare_update(&adapter->compare, ns);
5409 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5410 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5411 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5415 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5416 * @q_vector: pointer to q_vector containing needed info
5417 * @buffer: pointer to igb_buffer structure
5419 * If we were asked to do hardware stamping and such a time stamp is
5420 * available, then it must have been for this skb here because we only
5421 * allow only one such packet into the queue.
5423 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5425 struct igb_adapter *adapter = q_vector->adapter;
5426 struct e1000_hw *hw = &adapter->hw;
5427 struct skb_shared_hwtstamps shhwtstamps;
5428 u64 regval;
5430 /* if skb does not support hw timestamp or TX stamp not valid exit */
5431 if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5432 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5433 return;
5435 regval = rd32(E1000_TXSTMPL);
5436 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5438 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5439 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5443 * igb_clean_tx_irq - Reclaim resources after transmit completes
5444 * @q_vector: pointer to q_vector containing needed info
5445 * returns true if ring is completely cleaned
5447 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5449 struct igb_adapter *adapter = q_vector->adapter;
5450 struct igb_ring *tx_ring = q_vector->tx_ring;
5451 struct net_device *netdev = tx_ring->netdev;
5452 struct e1000_hw *hw = &adapter->hw;
5453 struct igb_buffer *buffer_info;
5454 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5455 unsigned int total_bytes = 0, total_packets = 0;
5456 unsigned int i, eop, count = 0;
5457 bool cleaned = false;
5459 i = tx_ring->next_to_clean;
5460 eop = tx_ring->buffer_info[i].next_to_watch;
5461 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5463 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5464 (count < tx_ring->count)) {
5465 rmb(); /* read buffer_info after eop_desc status */
5466 for (cleaned = false; !cleaned; count++) {
5467 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5468 buffer_info = &tx_ring->buffer_info[i];
5469 cleaned = (i == eop);
5471 if (buffer_info->skb) {
5472 total_bytes += buffer_info->bytecount;
5473 /* gso_segs is currently only valid for tcp */
5474 total_packets += buffer_info->gso_segs;
5475 igb_tx_hwtstamp(q_vector, buffer_info);
5478 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5479 tx_desc->wb.status = 0;
5481 i++;
5482 if (i == tx_ring->count)
5483 i = 0;
5485 eop = tx_ring->buffer_info[i].next_to_watch;
5486 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5489 tx_ring->next_to_clean = i;
5491 if (unlikely(count &&
5492 netif_carrier_ok(netdev) &&
5493 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5494 /* Make sure that anybody stopping the queue after this
5495 * sees the new next_to_clean.
5497 smp_mb();
5498 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5499 !(test_bit(__IGB_DOWN, &adapter->state))) {
5500 netif_wake_subqueue(netdev, tx_ring->queue_index);
5502 u64_stats_update_begin(&tx_ring->tx_syncp);
5503 tx_ring->tx_stats.restart_queue++;
5504 u64_stats_update_end(&tx_ring->tx_syncp);
5508 if (tx_ring->detect_tx_hung) {
5509 /* Detect a transmit hang in hardware, this serializes the
5510 * check with the clearing of time_stamp and movement of i */
5511 tx_ring->detect_tx_hung = false;
5512 if (tx_ring->buffer_info[i].time_stamp &&
5513 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5514 (adapter->tx_timeout_factor * HZ)) &&
5515 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5517 /* detected Tx unit hang */
5518 dev_err(tx_ring->dev,
5519 "Detected Tx Unit Hang\n"
5520 " Tx Queue <%d>\n"
5521 " TDH <%x>\n"
5522 " TDT <%x>\n"
5523 " next_to_use <%x>\n"
5524 " next_to_clean <%x>\n"
5525 "buffer_info[next_to_clean]\n"
5526 " time_stamp <%lx>\n"
5527 " next_to_watch <%x>\n"
5528 " jiffies <%lx>\n"
5529 " desc.status <%x>\n",
5530 tx_ring->queue_index,
5531 readl(tx_ring->head),
5532 readl(tx_ring->tail),
5533 tx_ring->next_to_use,
5534 tx_ring->next_to_clean,
5535 tx_ring->buffer_info[eop].time_stamp,
5536 eop,
5537 jiffies,
5538 eop_desc->wb.status);
5539 netif_stop_subqueue(netdev, tx_ring->queue_index);
5542 tx_ring->total_bytes += total_bytes;
5543 tx_ring->total_packets += total_packets;
5544 u64_stats_update_begin(&tx_ring->tx_syncp);
5545 tx_ring->tx_stats.bytes += total_bytes;
5546 tx_ring->tx_stats.packets += total_packets;
5547 u64_stats_update_end(&tx_ring->tx_syncp);
5548 return count < tx_ring->count;
5552 * igb_receive_skb - helper function to handle rx indications
5553 * @q_vector: structure containing interrupt and ring information
5554 * @skb: packet to send up
5555 * @vlan_tag: vlan tag for packet
5557 static void igb_receive_skb(struct igb_q_vector *q_vector,
5558 struct sk_buff *skb,
5559 u16 vlan_tag)
5561 struct igb_adapter *adapter = q_vector->adapter;
5563 if (vlan_tag && adapter->vlgrp)
5564 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5565 vlan_tag, skb);
5566 else
5567 napi_gro_receive(&q_vector->napi, skb);
5570 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5571 u32 status_err, struct sk_buff *skb)
5573 skb_checksum_none_assert(skb);
5575 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5576 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5577 (status_err & E1000_RXD_STAT_IXSM))
5578 return;
5580 /* TCP/UDP checksum error bit is set */
5581 if (status_err &
5582 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5584 * work around errata with sctp packets where the TCPE aka
5585 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5586 * packets, (aka let the stack check the crc32c)
5588 if ((skb->len == 60) &&
5589 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5590 u64_stats_update_begin(&ring->rx_syncp);
5591 ring->rx_stats.csum_err++;
5592 u64_stats_update_end(&ring->rx_syncp);
5594 /* let the stack verify checksum errors */
5595 return;
5597 /* It must be a TCP or UDP packet with a valid checksum */
5598 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5599 skb->ip_summed = CHECKSUM_UNNECESSARY;
5601 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5604 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5605 struct sk_buff *skb)
5607 struct igb_adapter *adapter = q_vector->adapter;
5608 struct e1000_hw *hw = &adapter->hw;
5609 u64 regval;
5612 * If this bit is set, then the RX registers contain the time stamp. No
5613 * other packet will be time stamped until we read these registers, so
5614 * read the registers to make them available again. Because only one
5615 * packet can be time stamped at a time, we know that the register
5616 * values must belong to this one here and therefore we don't need to
5617 * compare any of the additional attributes stored for it.
5619 * If nothing went wrong, then it should have a shared tx_flags that we
5620 * can turn into a skb_shared_hwtstamps.
5622 if (staterr & E1000_RXDADV_STAT_TSIP) {
5623 u32 *stamp = (u32 *)skb->data;
5624 regval = le32_to_cpu(*(stamp + 2));
5625 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5626 skb_pull(skb, IGB_TS_HDR_LEN);
5627 } else {
5628 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5629 return;
5631 regval = rd32(E1000_RXSTMPL);
5632 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5635 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5637 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5638 union e1000_adv_rx_desc *rx_desc)
5640 /* HW will not DMA in data larger than the given buffer, even if it
5641 * parses the (NFS, of course) header to be larger. In that case, it
5642 * fills the header buffer and spills the rest into the page.
5644 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5645 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5646 if (hlen > rx_ring->rx_buffer_len)
5647 hlen = rx_ring->rx_buffer_len;
5648 return hlen;
5651 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5652 int *work_done, int budget)
5654 struct igb_ring *rx_ring = q_vector->rx_ring;
5655 struct net_device *netdev = rx_ring->netdev;
5656 struct device *dev = rx_ring->dev;
5657 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5658 struct igb_buffer *buffer_info , *next_buffer;
5659 struct sk_buff *skb;
5660 bool cleaned = false;
5661 int cleaned_count = 0;
5662 int current_node = numa_node_id();
5663 unsigned int total_bytes = 0, total_packets = 0;
5664 unsigned int i;
5665 u32 staterr;
5666 u16 length;
5667 u16 vlan_tag;
5669 i = rx_ring->next_to_clean;
5670 buffer_info = &rx_ring->buffer_info[i];
5671 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5672 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5674 while (staterr & E1000_RXD_STAT_DD) {
5675 if (*work_done >= budget)
5676 break;
5677 (*work_done)++;
5678 rmb(); /* read descriptor and rx_buffer_info after status DD */
5680 skb = buffer_info->skb;
5681 prefetch(skb->data - NET_IP_ALIGN);
5682 buffer_info->skb = NULL;
5684 i++;
5685 if (i == rx_ring->count)
5686 i = 0;
5688 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5689 prefetch(next_rxd);
5690 next_buffer = &rx_ring->buffer_info[i];
5692 length = le16_to_cpu(rx_desc->wb.upper.length);
5693 cleaned = true;
5694 cleaned_count++;
5696 if (buffer_info->dma) {
5697 dma_unmap_single(dev, buffer_info->dma,
5698 rx_ring->rx_buffer_len,
5699 DMA_FROM_DEVICE);
5700 buffer_info->dma = 0;
5701 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5702 skb_put(skb, length);
5703 goto send_up;
5705 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5708 if (length) {
5709 dma_unmap_page(dev, buffer_info->page_dma,
5710 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5711 buffer_info->page_dma = 0;
5713 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5714 buffer_info->page,
5715 buffer_info->page_offset,
5716 length);
5718 if ((page_count(buffer_info->page) != 1) ||
5719 (page_to_nid(buffer_info->page) != current_node))
5720 buffer_info->page = NULL;
5721 else
5722 get_page(buffer_info->page);
5724 skb->len += length;
5725 skb->data_len += length;
5726 skb->truesize += length;
5729 if (!(staterr & E1000_RXD_STAT_EOP)) {
5730 buffer_info->skb = next_buffer->skb;
5731 buffer_info->dma = next_buffer->dma;
5732 next_buffer->skb = skb;
5733 next_buffer->dma = 0;
5734 goto next_desc;
5736 send_up:
5737 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5738 dev_kfree_skb_irq(skb);
5739 goto next_desc;
5742 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5743 igb_rx_hwtstamp(q_vector, staterr, skb);
5744 total_bytes += skb->len;
5745 total_packets++;
5747 igb_rx_checksum_adv(rx_ring, staterr, skb);
5749 skb->protocol = eth_type_trans(skb, netdev);
5750 skb_record_rx_queue(skb, rx_ring->queue_index);
5752 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5753 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5755 igb_receive_skb(q_vector, skb, vlan_tag);
5757 next_desc:
5758 rx_desc->wb.upper.status_error = 0;
5760 /* return some buffers to hardware, one at a time is too slow */
5761 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5762 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5763 cleaned_count = 0;
5766 /* use prefetched values */
5767 rx_desc = next_rxd;
5768 buffer_info = next_buffer;
5769 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5772 rx_ring->next_to_clean = i;
5773 cleaned_count = igb_desc_unused(rx_ring);
5775 if (cleaned_count)
5776 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5778 rx_ring->total_packets += total_packets;
5779 rx_ring->total_bytes += total_bytes;
5780 u64_stats_update_begin(&rx_ring->rx_syncp);
5781 rx_ring->rx_stats.packets += total_packets;
5782 rx_ring->rx_stats.bytes += total_bytes;
5783 u64_stats_update_end(&rx_ring->rx_syncp);
5784 return cleaned;
5788 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5789 * @adapter: address of board private structure
5791 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5793 struct net_device *netdev = rx_ring->netdev;
5794 union e1000_adv_rx_desc *rx_desc;
5795 struct igb_buffer *buffer_info;
5796 struct sk_buff *skb;
5797 unsigned int i;
5798 int bufsz;
5800 i = rx_ring->next_to_use;
5801 buffer_info = &rx_ring->buffer_info[i];
5803 bufsz = rx_ring->rx_buffer_len;
5805 while (cleaned_count--) {
5806 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5808 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5809 if (!buffer_info->page) {
5810 buffer_info->page = netdev_alloc_page(netdev);
5811 if (unlikely(!buffer_info->page)) {
5812 u64_stats_update_begin(&rx_ring->rx_syncp);
5813 rx_ring->rx_stats.alloc_failed++;
5814 u64_stats_update_end(&rx_ring->rx_syncp);
5815 goto no_buffers;
5817 buffer_info->page_offset = 0;
5818 } else {
5819 buffer_info->page_offset ^= PAGE_SIZE / 2;
5821 buffer_info->page_dma =
5822 dma_map_page(rx_ring->dev, buffer_info->page,
5823 buffer_info->page_offset,
5824 PAGE_SIZE / 2,
5825 DMA_FROM_DEVICE);
5826 if (dma_mapping_error(rx_ring->dev,
5827 buffer_info->page_dma)) {
5828 buffer_info->page_dma = 0;
5829 u64_stats_update_begin(&rx_ring->rx_syncp);
5830 rx_ring->rx_stats.alloc_failed++;
5831 u64_stats_update_end(&rx_ring->rx_syncp);
5832 goto no_buffers;
5836 skb = buffer_info->skb;
5837 if (!skb) {
5838 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5839 if (unlikely(!skb)) {
5840 u64_stats_update_begin(&rx_ring->rx_syncp);
5841 rx_ring->rx_stats.alloc_failed++;
5842 u64_stats_update_end(&rx_ring->rx_syncp);
5843 goto no_buffers;
5846 buffer_info->skb = skb;
5848 if (!buffer_info->dma) {
5849 buffer_info->dma = dma_map_single(rx_ring->dev,
5850 skb->data,
5851 bufsz,
5852 DMA_FROM_DEVICE);
5853 if (dma_mapping_error(rx_ring->dev,
5854 buffer_info->dma)) {
5855 buffer_info->dma = 0;
5856 u64_stats_update_begin(&rx_ring->rx_syncp);
5857 rx_ring->rx_stats.alloc_failed++;
5858 u64_stats_update_end(&rx_ring->rx_syncp);
5859 goto no_buffers;
5862 /* Refresh the desc even if buffer_addrs didn't change because
5863 * each write-back erases this info. */
5864 if (bufsz < IGB_RXBUFFER_1024) {
5865 rx_desc->read.pkt_addr =
5866 cpu_to_le64(buffer_info->page_dma);
5867 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5868 } else {
5869 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5870 rx_desc->read.hdr_addr = 0;
5873 i++;
5874 if (i == rx_ring->count)
5875 i = 0;
5876 buffer_info = &rx_ring->buffer_info[i];
5879 no_buffers:
5880 if (rx_ring->next_to_use != i) {
5881 rx_ring->next_to_use = i;
5882 if (i == 0)
5883 i = (rx_ring->count - 1);
5884 else
5885 i--;
5887 /* Force memory writes to complete before letting h/w
5888 * know there are new descriptors to fetch. (Only
5889 * applicable for weak-ordered memory model archs,
5890 * such as IA-64). */
5891 wmb();
5892 writel(i, rx_ring->tail);
5897 * igb_mii_ioctl -
5898 * @netdev:
5899 * @ifreq:
5900 * @cmd:
5902 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5904 struct igb_adapter *adapter = netdev_priv(netdev);
5905 struct mii_ioctl_data *data = if_mii(ifr);
5907 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5908 return -EOPNOTSUPP;
5910 switch (cmd) {
5911 case SIOCGMIIPHY:
5912 data->phy_id = adapter->hw.phy.addr;
5913 break;
5914 case SIOCGMIIREG:
5915 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5916 &data->val_out))
5917 return -EIO;
5918 break;
5919 case SIOCSMIIREG:
5920 default:
5921 return -EOPNOTSUPP;
5923 return 0;
5927 * igb_hwtstamp_ioctl - control hardware time stamping
5928 * @netdev:
5929 * @ifreq:
5930 * @cmd:
5932 * Outgoing time stamping can be enabled and disabled. Play nice and
5933 * disable it when requested, although it shouldn't case any overhead
5934 * when no packet needs it. At most one packet in the queue may be
5935 * marked for time stamping, otherwise it would be impossible to tell
5936 * for sure to which packet the hardware time stamp belongs.
5938 * Incoming time stamping has to be configured via the hardware
5939 * filters. Not all combinations are supported, in particular event
5940 * type has to be specified. Matching the kind of event packet is
5941 * not supported, with the exception of "all V2 events regardless of
5942 * level 2 or 4".
5945 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5946 struct ifreq *ifr, int cmd)
5948 struct igb_adapter *adapter = netdev_priv(netdev);
5949 struct e1000_hw *hw = &adapter->hw;
5950 struct hwtstamp_config config;
5951 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5952 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5953 u32 tsync_rx_cfg = 0;
5954 bool is_l4 = false;
5955 bool is_l2 = false;
5956 u32 regval;
5958 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5959 return -EFAULT;
5961 /* reserved for future extensions */
5962 if (config.flags)
5963 return -EINVAL;
5965 switch (config.tx_type) {
5966 case HWTSTAMP_TX_OFF:
5967 tsync_tx_ctl = 0;
5968 case HWTSTAMP_TX_ON:
5969 break;
5970 default:
5971 return -ERANGE;
5974 switch (config.rx_filter) {
5975 case HWTSTAMP_FILTER_NONE:
5976 tsync_rx_ctl = 0;
5977 break;
5978 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5979 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5980 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5981 case HWTSTAMP_FILTER_ALL:
5983 * register TSYNCRXCFG must be set, therefore it is not
5984 * possible to time stamp both Sync and Delay_Req messages
5985 * => fall back to time stamping all packets
5987 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5988 config.rx_filter = HWTSTAMP_FILTER_ALL;
5989 break;
5990 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5991 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5992 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5993 is_l4 = true;
5994 break;
5995 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5996 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5997 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5998 is_l4 = true;
5999 break;
6000 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6001 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6002 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6003 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6004 is_l2 = true;
6005 is_l4 = true;
6006 config.rx_filter = HWTSTAMP_FILTER_SOME;
6007 break;
6008 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6009 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6010 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6011 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6012 is_l2 = true;
6013 is_l4 = true;
6014 config.rx_filter = HWTSTAMP_FILTER_SOME;
6015 break;
6016 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6017 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6018 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6019 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6020 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6021 is_l2 = true;
6022 break;
6023 default:
6024 return -ERANGE;
6027 if (hw->mac.type == e1000_82575) {
6028 if (tsync_rx_ctl | tsync_tx_ctl)
6029 return -EINVAL;
6030 return 0;
6034 * Per-packet timestamping only works if all packets are
6035 * timestamped, so enable timestamping in all packets as
6036 * long as one rx filter was configured.
6038 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6039 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6040 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6043 /* enable/disable TX */
6044 regval = rd32(E1000_TSYNCTXCTL);
6045 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6046 regval |= tsync_tx_ctl;
6047 wr32(E1000_TSYNCTXCTL, regval);
6049 /* enable/disable RX */
6050 regval = rd32(E1000_TSYNCRXCTL);
6051 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6052 regval |= tsync_rx_ctl;
6053 wr32(E1000_TSYNCRXCTL, regval);
6055 /* define which PTP packets are time stamped */
6056 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6058 /* define ethertype filter for timestamped packets */
6059 if (is_l2)
6060 wr32(E1000_ETQF(3),
6061 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6062 E1000_ETQF_1588 | /* enable timestamping */
6063 ETH_P_1588)); /* 1588 eth protocol type */
6064 else
6065 wr32(E1000_ETQF(3), 0);
6067 #define PTP_PORT 319
6068 /* L4 Queue Filter[3]: filter by destination port and protocol */
6069 if (is_l4) {
6070 u32 ftqf = (IPPROTO_UDP /* UDP */
6071 | E1000_FTQF_VF_BP /* VF not compared */
6072 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6073 | E1000_FTQF_MASK); /* mask all inputs */
6074 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6076 wr32(E1000_IMIR(3), htons(PTP_PORT));
6077 wr32(E1000_IMIREXT(3),
6078 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6079 if (hw->mac.type == e1000_82576) {
6080 /* enable source port check */
6081 wr32(E1000_SPQF(3), htons(PTP_PORT));
6082 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6084 wr32(E1000_FTQF(3), ftqf);
6085 } else {
6086 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6088 wrfl();
6090 adapter->hwtstamp_config = config;
6092 /* clear TX/RX time stamp registers, just to be sure */
6093 regval = rd32(E1000_TXSTMPH);
6094 regval = rd32(E1000_RXSTMPH);
6096 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6097 -EFAULT : 0;
6101 * igb_ioctl -
6102 * @netdev:
6103 * @ifreq:
6104 * @cmd:
6106 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6108 switch (cmd) {
6109 case SIOCGMIIPHY:
6110 case SIOCGMIIREG:
6111 case SIOCSMIIREG:
6112 return igb_mii_ioctl(netdev, ifr, cmd);
6113 case SIOCSHWTSTAMP:
6114 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6115 default:
6116 return -EOPNOTSUPP;
6120 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6122 struct igb_adapter *adapter = hw->back;
6123 u16 cap_offset;
6125 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6126 if (!cap_offset)
6127 return -E1000_ERR_CONFIG;
6129 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6131 return 0;
6134 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6136 struct igb_adapter *adapter = hw->back;
6137 u16 cap_offset;
6139 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6140 if (!cap_offset)
6141 return -E1000_ERR_CONFIG;
6143 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6145 return 0;
6148 static void igb_vlan_rx_register(struct net_device *netdev,
6149 struct vlan_group *grp)
6151 struct igb_adapter *adapter = netdev_priv(netdev);
6152 struct e1000_hw *hw = &adapter->hw;
6153 u32 ctrl, rctl;
6155 igb_irq_disable(adapter);
6156 adapter->vlgrp = grp;
6158 if (grp) {
6159 /* enable VLAN tag insert/strip */
6160 ctrl = rd32(E1000_CTRL);
6161 ctrl |= E1000_CTRL_VME;
6162 wr32(E1000_CTRL, ctrl);
6164 /* Disable CFI check */
6165 rctl = rd32(E1000_RCTL);
6166 rctl &= ~E1000_RCTL_CFIEN;
6167 wr32(E1000_RCTL, rctl);
6168 } else {
6169 /* disable VLAN tag insert/strip */
6170 ctrl = rd32(E1000_CTRL);
6171 ctrl &= ~E1000_CTRL_VME;
6172 wr32(E1000_CTRL, ctrl);
6175 igb_rlpml_set(adapter);
6177 if (!test_bit(__IGB_DOWN, &adapter->state))
6178 igb_irq_enable(adapter);
6181 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6183 struct igb_adapter *adapter = netdev_priv(netdev);
6184 struct e1000_hw *hw = &adapter->hw;
6185 int pf_id = adapter->vfs_allocated_count;
6187 /* attempt to add filter to vlvf array */
6188 igb_vlvf_set(adapter, vid, true, pf_id);
6190 /* add the filter since PF can receive vlans w/o entry in vlvf */
6191 igb_vfta_set(hw, vid, true);
6194 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6196 struct igb_adapter *adapter = netdev_priv(netdev);
6197 struct e1000_hw *hw = &adapter->hw;
6198 int pf_id = adapter->vfs_allocated_count;
6199 s32 err;
6201 igb_irq_disable(adapter);
6202 vlan_group_set_device(adapter->vlgrp, vid, NULL);
6204 if (!test_bit(__IGB_DOWN, &adapter->state))
6205 igb_irq_enable(adapter);
6207 /* remove vlan from VLVF table array */
6208 err = igb_vlvf_set(adapter, vid, false, pf_id);
6210 /* if vid was not present in VLVF just remove it from table */
6211 if (err)
6212 igb_vfta_set(hw, vid, false);
6215 static void igb_restore_vlan(struct igb_adapter *adapter)
6217 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6219 if (adapter->vlgrp) {
6220 u16 vid;
6221 for (vid = 0; vid < VLAN_N_VID; vid++) {
6222 if (!vlan_group_get_device(adapter->vlgrp, vid))
6223 continue;
6224 igb_vlan_rx_add_vid(adapter->netdev, vid);
6229 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6231 struct pci_dev *pdev = adapter->pdev;
6232 struct e1000_mac_info *mac = &adapter->hw.mac;
6234 mac->autoneg = 0;
6236 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6237 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6238 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6239 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6240 return -EINVAL;
6243 switch (spddplx) {
6244 case SPEED_10 + DUPLEX_HALF:
6245 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6246 break;
6247 case SPEED_10 + DUPLEX_FULL:
6248 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6249 break;
6250 case SPEED_100 + DUPLEX_HALF:
6251 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6252 break;
6253 case SPEED_100 + DUPLEX_FULL:
6254 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6255 break;
6256 case SPEED_1000 + DUPLEX_FULL:
6257 mac->autoneg = 1;
6258 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6259 break;
6260 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6261 default:
6262 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6263 return -EINVAL;
6265 return 0;
6268 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6270 struct net_device *netdev = pci_get_drvdata(pdev);
6271 struct igb_adapter *adapter = netdev_priv(netdev);
6272 struct e1000_hw *hw = &adapter->hw;
6273 u32 ctrl, rctl, status;
6274 u32 wufc = adapter->wol;
6275 #ifdef CONFIG_PM
6276 int retval = 0;
6277 #endif
6279 netif_device_detach(netdev);
6281 if (netif_running(netdev))
6282 igb_close(netdev);
6284 igb_clear_interrupt_scheme(adapter);
6286 #ifdef CONFIG_PM
6287 retval = pci_save_state(pdev);
6288 if (retval)
6289 return retval;
6290 #endif
6292 status = rd32(E1000_STATUS);
6293 if (status & E1000_STATUS_LU)
6294 wufc &= ~E1000_WUFC_LNKC;
6296 if (wufc) {
6297 igb_setup_rctl(adapter);
6298 igb_set_rx_mode(netdev);
6300 /* turn on all-multi mode if wake on multicast is enabled */
6301 if (wufc & E1000_WUFC_MC) {
6302 rctl = rd32(E1000_RCTL);
6303 rctl |= E1000_RCTL_MPE;
6304 wr32(E1000_RCTL, rctl);
6307 ctrl = rd32(E1000_CTRL);
6308 /* advertise wake from D3Cold */
6309 #define E1000_CTRL_ADVD3WUC 0x00100000
6310 /* phy power management enable */
6311 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6312 ctrl |= E1000_CTRL_ADVD3WUC;
6313 wr32(E1000_CTRL, ctrl);
6315 /* Allow time for pending master requests to run */
6316 igb_disable_pcie_master(hw);
6318 wr32(E1000_WUC, E1000_WUC_PME_EN);
6319 wr32(E1000_WUFC, wufc);
6320 } else {
6321 wr32(E1000_WUC, 0);
6322 wr32(E1000_WUFC, 0);
6325 *enable_wake = wufc || adapter->en_mng_pt;
6326 if (!*enable_wake)
6327 igb_power_down_link(adapter);
6328 else
6329 igb_power_up_link(adapter);
6331 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6332 * would have already happened in close and is redundant. */
6333 igb_release_hw_control(adapter);
6335 pci_disable_device(pdev);
6337 return 0;
6340 #ifdef CONFIG_PM
6341 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6343 int retval;
6344 bool wake;
6346 retval = __igb_shutdown(pdev, &wake);
6347 if (retval)
6348 return retval;
6350 if (wake) {
6351 pci_prepare_to_sleep(pdev);
6352 } else {
6353 pci_wake_from_d3(pdev, false);
6354 pci_set_power_state(pdev, PCI_D3hot);
6357 return 0;
6360 static int igb_resume(struct pci_dev *pdev)
6362 struct net_device *netdev = pci_get_drvdata(pdev);
6363 struct igb_adapter *adapter = netdev_priv(netdev);
6364 struct e1000_hw *hw = &adapter->hw;
6365 u32 err;
6367 pci_set_power_state(pdev, PCI_D0);
6368 pci_restore_state(pdev);
6369 pci_save_state(pdev);
6371 err = pci_enable_device_mem(pdev);
6372 if (err) {
6373 dev_err(&pdev->dev,
6374 "igb: Cannot enable PCI device from suspend\n");
6375 return err;
6377 pci_set_master(pdev);
6379 pci_enable_wake(pdev, PCI_D3hot, 0);
6380 pci_enable_wake(pdev, PCI_D3cold, 0);
6382 if (igb_init_interrupt_scheme(adapter)) {
6383 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6384 return -ENOMEM;
6387 igb_reset(adapter);
6389 /* let the f/w know that the h/w is now under the control of the
6390 * driver. */
6391 igb_get_hw_control(adapter);
6393 wr32(E1000_WUS, ~0);
6395 if (netif_running(netdev)) {
6396 err = igb_open(netdev);
6397 if (err)
6398 return err;
6401 netif_device_attach(netdev);
6403 return 0;
6405 #endif
6407 static void igb_shutdown(struct pci_dev *pdev)
6409 bool wake;
6411 __igb_shutdown(pdev, &wake);
6413 if (system_state == SYSTEM_POWER_OFF) {
6414 pci_wake_from_d3(pdev, wake);
6415 pci_set_power_state(pdev, PCI_D3hot);
6419 #ifdef CONFIG_NET_POLL_CONTROLLER
6421 * Polling 'interrupt' - used by things like netconsole to send skbs
6422 * without having to re-enable interrupts. It's not called while
6423 * the interrupt routine is executing.
6425 static void igb_netpoll(struct net_device *netdev)
6427 struct igb_adapter *adapter = netdev_priv(netdev);
6428 struct e1000_hw *hw = &adapter->hw;
6429 int i;
6431 if (!adapter->msix_entries) {
6432 struct igb_q_vector *q_vector = adapter->q_vector[0];
6433 igb_irq_disable(adapter);
6434 napi_schedule(&q_vector->napi);
6435 return;
6438 for (i = 0; i < adapter->num_q_vectors; i++) {
6439 struct igb_q_vector *q_vector = adapter->q_vector[i];
6440 wr32(E1000_EIMC, q_vector->eims_value);
6441 napi_schedule(&q_vector->napi);
6444 #endif /* CONFIG_NET_POLL_CONTROLLER */
6447 * igb_io_error_detected - called when PCI error is detected
6448 * @pdev: Pointer to PCI device
6449 * @state: The current pci connection state
6451 * This function is called after a PCI bus error affecting
6452 * this device has been detected.
6454 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6455 pci_channel_state_t state)
6457 struct net_device *netdev = pci_get_drvdata(pdev);
6458 struct igb_adapter *adapter = netdev_priv(netdev);
6460 netif_device_detach(netdev);
6462 if (state == pci_channel_io_perm_failure)
6463 return PCI_ERS_RESULT_DISCONNECT;
6465 if (netif_running(netdev))
6466 igb_down(adapter);
6467 pci_disable_device(pdev);
6469 /* Request a slot slot reset. */
6470 return PCI_ERS_RESULT_NEED_RESET;
6474 * igb_io_slot_reset - called after the pci bus has been reset.
6475 * @pdev: Pointer to PCI device
6477 * Restart the card from scratch, as if from a cold-boot. Implementation
6478 * resembles the first-half of the igb_resume routine.
6480 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6482 struct net_device *netdev = pci_get_drvdata(pdev);
6483 struct igb_adapter *adapter = netdev_priv(netdev);
6484 struct e1000_hw *hw = &adapter->hw;
6485 pci_ers_result_t result;
6486 int err;
6488 if (pci_enable_device_mem(pdev)) {
6489 dev_err(&pdev->dev,
6490 "Cannot re-enable PCI device after reset.\n");
6491 result = PCI_ERS_RESULT_DISCONNECT;
6492 } else {
6493 pci_set_master(pdev);
6494 pci_restore_state(pdev);
6495 pci_save_state(pdev);
6497 pci_enable_wake(pdev, PCI_D3hot, 0);
6498 pci_enable_wake(pdev, PCI_D3cold, 0);
6500 igb_reset(adapter);
6501 wr32(E1000_WUS, ~0);
6502 result = PCI_ERS_RESULT_RECOVERED;
6505 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6506 if (err) {
6507 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6508 "failed 0x%0x\n", err);
6509 /* non-fatal, continue */
6512 return result;
6516 * igb_io_resume - called when traffic can start flowing again.
6517 * @pdev: Pointer to PCI device
6519 * This callback is called when the error recovery driver tells us that
6520 * its OK to resume normal operation. Implementation resembles the
6521 * second-half of the igb_resume routine.
6523 static void igb_io_resume(struct pci_dev *pdev)
6525 struct net_device *netdev = pci_get_drvdata(pdev);
6526 struct igb_adapter *adapter = netdev_priv(netdev);
6528 if (netif_running(netdev)) {
6529 if (igb_up(adapter)) {
6530 dev_err(&pdev->dev, "igb_up failed after reset\n");
6531 return;
6535 netif_device_attach(netdev);
6537 /* let the f/w know that the h/w is now under the control of the
6538 * driver. */
6539 igb_get_hw_control(adapter);
6542 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6543 u8 qsel)
6545 u32 rar_low, rar_high;
6546 struct e1000_hw *hw = &adapter->hw;
6548 /* HW expects these in little endian so we reverse the byte order
6549 * from network order (big endian) to little endian
6551 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6552 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6553 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6555 /* Indicate to hardware the Address is Valid. */
6556 rar_high |= E1000_RAH_AV;
6558 if (hw->mac.type == e1000_82575)
6559 rar_high |= E1000_RAH_POOL_1 * qsel;
6560 else
6561 rar_high |= E1000_RAH_POOL_1 << qsel;
6563 wr32(E1000_RAL(index), rar_low);
6564 wrfl();
6565 wr32(E1000_RAH(index), rar_high);
6566 wrfl();
6569 static int igb_set_vf_mac(struct igb_adapter *adapter,
6570 int vf, unsigned char *mac_addr)
6572 struct e1000_hw *hw = &adapter->hw;
6573 /* VF MAC addresses start at end of receive addresses and moves
6574 * torwards the first, as a result a collision should not be possible */
6575 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6577 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6579 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6581 return 0;
6584 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6586 struct igb_adapter *adapter = netdev_priv(netdev);
6587 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6588 return -EINVAL;
6589 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6590 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6591 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6592 " change effective.");
6593 if (test_bit(__IGB_DOWN, &adapter->state)) {
6594 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6595 " but the PF device is not up.\n");
6596 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6597 " attempting to use the VF device.\n");
6599 return igb_set_vf_mac(adapter, vf, mac);
6602 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6604 return -EOPNOTSUPP;
6607 static int igb_ndo_get_vf_config(struct net_device *netdev,
6608 int vf, struct ifla_vf_info *ivi)
6610 struct igb_adapter *adapter = netdev_priv(netdev);
6611 if (vf >= adapter->vfs_allocated_count)
6612 return -EINVAL;
6613 ivi->vf = vf;
6614 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6615 ivi->tx_rate = 0;
6616 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6617 ivi->qos = adapter->vf_data[vf].pf_qos;
6618 return 0;
6621 static void igb_vmm_control(struct igb_adapter *adapter)
6623 struct e1000_hw *hw = &adapter->hw;
6624 u32 reg;
6626 switch (hw->mac.type) {
6627 case e1000_82575:
6628 default:
6629 /* replication is not supported for 82575 */
6630 return;
6631 case e1000_82576:
6632 /* notify HW that the MAC is adding vlan tags */
6633 reg = rd32(E1000_DTXCTL);
6634 reg |= E1000_DTXCTL_VLAN_ADDED;
6635 wr32(E1000_DTXCTL, reg);
6636 case e1000_82580:
6637 /* enable replication vlan tag stripping */
6638 reg = rd32(E1000_RPLOLR);
6639 reg |= E1000_RPLOLR_STRVLAN;
6640 wr32(E1000_RPLOLR, reg);
6641 case e1000_i350:
6642 /* none of the above registers are supported by i350 */
6643 break;
6646 if (adapter->vfs_allocated_count) {
6647 igb_vmdq_set_loopback_pf(hw, true);
6648 igb_vmdq_set_replication_pf(hw, true);
6649 igb_vmdq_set_anti_spoofing_pf(hw, true,
6650 adapter->vfs_allocated_count);
6651 } else {
6652 igb_vmdq_set_loopback_pf(hw, false);
6653 igb_vmdq_set_replication_pf(hw, false);
6657 /* igb_main.c */