igb: fix warning about unused function
[linux-2.6.git] / drivers / net / ethernet / intel / igb / igb_main.c
blobfd911cac1ac56bcac84ef30045210d12886ce421
1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2012 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
62 #define MAJ 3
63 #define MIN 2
64 #define BUILD 10
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
73 static const struct e1000_info *igb_info_tbl[] = {
74 [board_82575] = &e1000_82575_info,
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103 /* required last entry */
104 {0, }
107 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
109 void igb_reset(struct igb_adapter *);
110 static int igb_setup_all_tx_resources(struct igb_adapter *);
111 static int igb_setup_all_rx_resources(struct igb_adapter *);
112 static void igb_free_all_tx_resources(struct igb_adapter *);
113 static void igb_free_all_rx_resources(struct igb_adapter *);
114 static void igb_setup_mrqc(struct igb_adapter *);
115 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116 static void __devexit igb_remove(struct pci_dev *pdev);
117 static void igb_init_hw_timer(struct igb_adapter *adapter);
118 static int igb_sw_init(struct igb_adapter *);
119 static int igb_open(struct net_device *);
120 static int igb_close(struct net_device *);
121 static void igb_configure_tx(struct igb_adapter *);
122 static void igb_configure_rx(struct igb_adapter *);
123 static void igb_clean_all_tx_rings(struct igb_adapter *);
124 static void igb_clean_all_rx_rings(struct igb_adapter *);
125 static void igb_clean_tx_ring(struct igb_ring *);
126 static void igb_clean_rx_ring(struct igb_ring *);
127 static void igb_set_rx_mode(struct net_device *);
128 static void igb_update_phy_info(unsigned long);
129 static void igb_watchdog(unsigned long);
130 static void igb_watchdog_task(struct work_struct *);
131 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133 struct rtnl_link_stats64 *stats);
134 static int igb_change_mtu(struct net_device *, int);
135 static int igb_set_mac(struct net_device *, void *);
136 static void igb_set_uta(struct igb_adapter *adapter);
137 static irqreturn_t igb_intr(int irq, void *);
138 static irqreturn_t igb_intr_msi(int irq, void *);
139 static irqreturn_t igb_msix_other(int irq, void *);
140 static irqreturn_t igb_msix_ring(int irq, void *);
141 #ifdef CONFIG_IGB_DCA
142 static void igb_update_dca(struct igb_q_vector *);
143 static void igb_setup_dca(struct igb_adapter *);
144 #endif /* CONFIG_IGB_DCA */
145 static int igb_poll(struct napi_struct *, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector *);
147 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149 static void igb_tx_timeout(struct net_device *);
150 static void igb_reset_task(struct work_struct *);
151 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152 static int igb_vlan_rx_add_vid(struct net_device *, u16);
153 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154 static void igb_restore_vlan(struct igb_adapter *);
155 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156 static void igb_ping_all_vfs(struct igb_adapter *);
157 static void igb_msg_task(struct igb_adapter *);
158 static void igb_vmm_control(struct igb_adapter *);
159 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163 int vf, u16 vlan, u8 qos);
164 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166 struct ifla_vf_info *ivi);
167 static void igb_check_vf_rate_limit(struct igb_adapter *);
169 #ifdef CONFIG_PCI_IOV
170 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172 static int igb_check_vf_assignment(struct igb_adapter *adapter);
173 #endif
175 #ifdef CONFIG_PM
176 #ifdef CONFIG_PM_SLEEP
177 static int igb_suspend(struct device *);
178 #endif
179 static int igb_resume(struct device *);
180 #ifdef CONFIG_PM_RUNTIME
181 static int igb_runtime_suspend(struct device *dev);
182 static int igb_runtime_resume(struct device *dev);
183 static int igb_runtime_idle(struct device *dev);
184 #endif
185 static const struct dev_pm_ops igb_pm_ops = {
186 SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
187 SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
188 igb_runtime_idle)
190 #endif
191 static void igb_shutdown(struct pci_dev *);
192 #ifdef CONFIG_IGB_DCA
193 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
194 static struct notifier_block dca_notifier = {
195 .notifier_call = igb_notify_dca,
196 .next = NULL,
197 .priority = 0
199 #endif
200 #ifdef CONFIG_NET_POLL_CONTROLLER
201 /* for netdump / net console */
202 static void igb_netpoll(struct net_device *);
203 #endif
204 #ifdef CONFIG_PCI_IOV
205 static unsigned int max_vfs = 0;
206 module_param(max_vfs, uint, 0);
207 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
208 "per physical function");
209 #endif /* CONFIG_PCI_IOV */
211 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
212 pci_channel_state_t);
213 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
214 static void igb_io_resume(struct pci_dev *);
216 static struct pci_error_handlers igb_err_handler = {
217 .error_detected = igb_io_error_detected,
218 .slot_reset = igb_io_slot_reset,
219 .resume = igb_io_resume,
222 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
224 static struct pci_driver igb_driver = {
225 .name = igb_driver_name,
226 .id_table = igb_pci_tbl,
227 .probe = igb_probe,
228 .remove = __devexit_p(igb_remove),
229 #ifdef CONFIG_PM
230 .driver.pm = &igb_pm_ops,
231 #endif
232 .shutdown = igb_shutdown,
233 .err_handler = &igb_err_handler
236 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
237 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
238 MODULE_LICENSE("GPL");
239 MODULE_VERSION(DRV_VERSION);
241 struct igb_reg_info {
242 u32 ofs;
243 char *name;
246 static const struct igb_reg_info igb_reg_info_tbl[] = {
248 /* General Registers */
249 {E1000_CTRL, "CTRL"},
250 {E1000_STATUS, "STATUS"},
251 {E1000_CTRL_EXT, "CTRL_EXT"},
253 /* Interrupt Registers */
254 {E1000_ICR, "ICR"},
256 /* RX Registers */
257 {E1000_RCTL, "RCTL"},
258 {E1000_RDLEN(0), "RDLEN"},
259 {E1000_RDH(0), "RDH"},
260 {E1000_RDT(0), "RDT"},
261 {E1000_RXDCTL(0), "RXDCTL"},
262 {E1000_RDBAL(0), "RDBAL"},
263 {E1000_RDBAH(0), "RDBAH"},
265 /* TX Registers */
266 {E1000_TCTL, "TCTL"},
267 {E1000_TDBAL(0), "TDBAL"},
268 {E1000_TDBAH(0), "TDBAH"},
269 {E1000_TDLEN(0), "TDLEN"},
270 {E1000_TDH(0), "TDH"},
271 {E1000_TDT(0), "TDT"},
272 {E1000_TXDCTL(0), "TXDCTL"},
273 {E1000_TDFH, "TDFH"},
274 {E1000_TDFT, "TDFT"},
275 {E1000_TDFHS, "TDFHS"},
276 {E1000_TDFPC, "TDFPC"},
278 /* List Terminator */
283 * igb_regdump - register printout routine
285 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
287 int n = 0;
288 char rname[16];
289 u32 regs[8];
291 switch (reginfo->ofs) {
292 case E1000_RDLEN(0):
293 for (n = 0; n < 4; n++)
294 regs[n] = rd32(E1000_RDLEN(n));
295 break;
296 case E1000_RDH(0):
297 for (n = 0; n < 4; n++)
298 regs[n] = rd32(E1000_RDH(n));
299 break;
300 case E1000_RDT(0):
301 for (n = 0; n < 4; n++)
302 regs[n] = rd32(E1000_RDT(n));
303 break;
304 case E1000_RXDCTL(0):
305 for (n = 0; n < 4; n++)
306 regs[n] = rd32(E1000_RXDCTL(n));
307 break;
308 case E1000_RDBAL(0):
309 for (n = 0; n < 4; n++)
310 regs[n] = rd32(E1000_RDBAL(n));
311 break;
312 case E1000_RDBAH(0):
313 for (n = 0; n < 4; n++)
314 regs[n] = rd32(E1000_RDBAH(n));
315 break;
316 case E1000_TDBAL(0):
317 for (n = 0; n < 4; n++)
318 regs[n] = rd32(E1000_RDBAL(n));
319 break;
320 case E1000_TDBAH(0):
321 for (n = 0; n < 4; n++)
322 regs[n] = rd32(E1000_TDBAH(n));
323 break;
324 case E1000_TDLEN(0):
325 for (n = 0; n < 4; n++)
326 regs[n] = rd32(E1000_TDLEN(n));
327 break;
328 case E1000_TDH(0):
329 for (n = 0; n < 4; n++)
330 regs[n] = rd32(E1000_TDH(n));
331 break;
332 case E1000_TDT(0):
333 for (n = 0; n < 4; n++)
334 regs[n] = rd32(E1000_TDT(n));
335 break;
336 case E1000_TXDCTL(0):
337 for (n = 0; n < 4; n++)
338 regs[n] = rd32(E1000_TXDCTL(n));
339 break;
340 default:
341 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
342 return;
345 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
346 pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
347 regs[2], regs[3]);
351 * igb_dump - Print registers, tx-rings and rx-rings
353 static void igb_dump(struct igb_adapter *adapter)
355 struct net_device *netdev = adapter->netdev;
356 struct e1000_hw *hw = &adapter->hw;
357 struct igb_reg_info *reginfo;
358 struct igb_ring *tx_ring;
359 union e1000_adv_tx_desc *tx_desc;
360 struct my_u0 { u64 a; u64 b; } *u0;
361 struct igb_ring *rx_ring;
362 union e1000_adv_rx_desc *rx_desc;
363 u32 staterr;
364 u16 i, n;
366 if (!netif_msg_hw(adapter))
367 return;
369 /* Print netdevice Info */
370 if (netdev) {
371 dev_info(&adapter->pdev->dev, "Net device Info\n");
372 pr_info("Device Name state trans_start "
373 "last_rx\n");
374 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
375 netdev->state, netdev->trans_start, netdev->last_rx);
378 /* Print Registers */
379 dev_info(&adapter->pdev->dev, "Register Dump\n");
380 pr_info(" Register Name Value\n");
381 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
382 reginfo->name; reginfo++) {
383 igb_regdump(hw, reginfo);
386 /* Print TX Ring Summary */
387 if (!netdev || !netif_running(netdev))
388 goto exit;
390 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
391 pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n");
392 for (n = 0; n < adapter->num_tx_queues; n++) {
393 struct igb_tx_buffer *buffer_info;
394 tx_ring = adapter->tx_ring[n];
395 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
396 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
397 n, tx_ring->next_to_use, tx_ring->next_to_clean,
398 (u64)buffer_info->dma,
399 buffer_info->length,
400 buffer_info->next_to_watch,
401 (u64)buffer_info->time_stamp);
404 /* Print TX Rings */
405 if (!netif_msg_tx_done(adapter))
406 goto rx_ring_summary;
408 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
410 /* Transmit Descriptor Formats
412 * Advanced Transmit Descriptor
413 * +--------------------------------------------------------------+
414 * 0 | Buffer Address [63:0] |
415 * +--------------------------------------------------------------+
416 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
417 * +--------------------------------------------------------------+
418 * 63 46 45 40 39 38 36 35 32 31 24 15 0
421 for (n = 0; n < adapter->num_tx_queues; n++) {
422 tx_ring = adapter->tx_ring[n];
423 pr_info("------------------------------------\n");
424 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
425 pr_info("------------------------------------\n");
426 pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] "
427 "[bi->dma ] leng ntw timestamp "
428 "bi->skb\n");
430 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
431 const char *next_desc;
432 struct igb_tx_buffer *buffer_info;
433 tx_desc = IGB_TX_DESC(tx_ring, i);
434 buffer_info = &tx_ring->tx_buffer_info[i];
435 u0 = (struct my_u0 *)tx_desc;
436 if (i == tx_ring->next_to_use &&
437 i == tx_ring->next_to_clean)
438 next_desc = " NTC/U";
439 else if (i == tx_ring->next_to_use)
440 next_desc = " NTU";
441 else if (i == tx_ring->next_to_clean)
442 next_desc = " NTC";
443 else
444 next_desc = "";
446 pr_info("T [0x%03X] %016llX %016llX %016llX"
447 " %04X %p %016llX %p%s\n", i,
448 le64_to_cpu(u0->a),
449 le64_to_cpu(u0->b),
450 (u64)buffer_info->dma,
451 buffer_info->length,
452 buffer_info->next_to_watch,
453 (u64)buffer_info->time_stamp,
454 buffer_info->skb, next_desc);
456 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
457 print_hex_dump(KERN_INFO, "",
458 DUMP_PREFIX_ADDRESS,
459 16, 1, phys_to_virt(buffer_info->dma),
460 buffer_info->length, true);
464 /* Print RX Rings Summary */
465 rx_ring_summary:
466 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
467 pr_info("Queue [NTU] [NTC]\n");
468 for (n = 0; n < adapter->num_rx_queues; n++) {
469 rx_ring = adapter->rx_ring[n];
470 pr_info(" %5d %5X %5X\n",
471 n, rx_ring->next_to_use, rx_ring->next_to_clean);
474 /* Print RX Rings */
475 if (!netif_msg_rx_status(adapter))
476 goto exit;
478 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
480 /* Advanced Receive Descriptor (Read) Format
481 * 63 1 0
482 * +-----------------------------------------------------+
483 * 0 | Packet Buffer Address [63:1] |A0/NSE|
484 * +----------------------------------------------+------+
485 * 8 | Header Buffer Address [63:1] | DD |
486 * +-----------------------------------------------------+
489 * Advanced Receive Descriptor (Write-Back) Format
491 * 63 48 47 32 31 30 21 20 17 16 4 3 0
492 * +------------------------------------------------------+
493 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
494 * | Checksum Ident | | | | Type | Type |
495 * +------------------------------------------------------+
496 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
497 * +------------------------------------------------------+
498 * 63 48 47 32 31 20 19 0
501 for (n = 0; n < adapter->num_rx_queues; n++) {
502 rx_ring = adapter->rx_ring[n];
503 pr_info("------------------------------------\n");
504 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
505 pr_info("------------------------------------\n");
506 pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] "
507 "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n");
508 pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----"
509 "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
511 for (i = 0; i < rx_ring->count; i++) {
512 const char *next_desc;
513 struct igb_rx_buffer *buffer_info;
514 buffer_info = &rx_ring->rx_buffer_info[i];
515 rx_desc = IGB_RX_DESC(rx_ring, i);
516 u0 = (struct my_u0 *)rx_desc;
517 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
519 if (i == rx_ring->next_to_use)
520 next_desc = " NTU";
521 else if (i == rx_ring->next_to_clean)
522 next_desc = " NTC";
523 else
524 next_desc = "";
526 if (staterr & E1000_RXD_STAT_DD) {
527 /* Descriptor Done */
528 pr_info("%s[0x%03X] %016llX %016llX -------"
529 "--------- %p%s\n", "RWB", i,
530 le64_to_cpu(u0->a),
531 le64_to_cpu(u0->b),
532 buffer_info->skb, next_desc);
533 } else {
534 pr_info("%s[0x%03X] %016llX %016llX %016llX"
535 " %p%s\n", "R ", i,
536 le64_to_cpu(u0->a),
537 le64_to_cpu(u0->b),
538 (u64)buffer_info->dma,
539 buffer_info->skb, next_desc);
541 if (netif_msg_pktdata(adapter)) {
542 print_hex_dump(KERN_INFO, "",
543 DUMP_PREFIX_ADDRESS,
544 16, 1,
545 phys_to_virt(buffer_info->dma),
546 IGB_RX_HDR_LEN, true);
547 print_hex_dump(KERN_INFO, "",
548 DUMP_PREFIX_ADDRESS,
549 16, 1,
550 phys_to_virt(
551 buffer_info->page_dma +
552 buffer_info->page_offset),
553 PAGE_SIZE/2, true);
559 exit:
560 return;
565 * igb_read_clock - read raw cycle counter (to be used by time counter)
567 static cycle_t igb_read_clock(const struct cyclecounter *tc)
569 struct igb_adapter *adapter =
570 container_of(tc, struct igb_adapter, cycles);
571 struct e1000_hw *hw = &adapter->hw;
572 u64 stamp = 0;
573 int shift = 0;
576 * The timestamp latches on lowest register read. For the 82580
577 * the lowest register is SYSTIMR instead of SYSTIML. However we never
578 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
580 if (hw->mac.type >= e1000_82580) {
581 stamp = rd32(E1000_SYSTIMR) >> 8;
582 shift = IGB_82580_TSYNC_SHIFT;
585 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
586 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
587 return stamp;
591 * igb_get_hw_dev - return device
592 * used by hardware layer to print debugging information
594 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
596 struct igb_adapter *adapter = hw->back;
597 return adapter->netdev;
601 * igb_init_module - Driver Registration Routine
603 * igb_init_module is the first routine called when the driver is
604 * loaded. All it does is register with the PCI subsystem.
606 static int __init igb_init_module(void)
608 int ret;
609 pr_info("%s - version %s\n",
610 igb_driver_string, igb_driver_version);
612 pr_info("%s\n", igb_copyright);
614 #ifdef CONFIG_IGB_DCA
615 dca_register_notify(&dca_notifier);
616 #endif
617 ret = pci_register_driver(&igb_driver);
618 return ret;
621 module_init(igb_init_module);
624 * igb_exit_module - Driver Exit Cleanup Routine
626 * igb_exit_module is called just before the driver is removed
627 * from memory.
629 static void __exit igb_exit_module(void)
631 #ifdef CONFIG_IGB_DCA
632 dca_unregister_notify(&dca_notifier);
633 #endif
634 pci_unregister_driver(&igb_driver);
637 module_exit(igb_exit_module);
639 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
641 * igb_cache_ring_register - Descriptor ring to register mapping
642 * @adapter: board private structure to initialize
644 * Once we know the feature-set enabled for the device, we'll cache
645 * the register offset the descriptor ring is assigned to.
647 static void igb_cache_ring_register(struct igb_adapter *adapter)
649 int i = 0, j = 0;
650 u32 rbase_offset = adapter->vfs_allocated_count;
652 switch (adapter->hw.mac.type) {
653 case e1000_82576:
654 /* The queues are allocated for virtualization such that VF 0
655 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
656 * In order to avoid collision we start at the first free queue
657 * and continue consuming queues in the same sequence
659 if (adapter->vfs_allocated_count) {
660 for (; i < adapter->rss_queues; i++)
661 adapter->rx_ring[i]->reg_idx = rbase_offset +
662 Q_IDX_82576(i);
664 case e1000_82575:
665 case e1000_82580:
666 case e1000_i350:
667 default:
668 for (; i < adapter->num_rx_queues; i++)
669 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
670 for (; j < adapter->num_tx_queues; j++)
671 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
672 break;
676 static void igb_free_queues(struct igb_adapter *adapter)
678 int i;
680 for (i = 0; i < adapter->num_tx_queues; i++) {
681 kfree(adapter->tx_ring[i]);
682 adapter->tx_ring[i] = NULL;
684 for (i = 0; i < adapter->num_rx_queues; i++) {
685 kfree(adapter->rx_ring[i]);
686 adapter->rx_ring[i] = NULL;
688 adapter->num_rx_queues = 0;
689 adapter->num_tx_queues = 0;
693 * igb_alloc_queues - Allocate memory for all rings
694 * @adapter: board private structure to initialize
696 * We allocate one ring per queue at run-time since we don't know the
697 * number of queues at compile-time.
699 static int igb_alloc_queues(struct igb_adapter *adapter)
701 struct igb_ring *ring;
702 int i;
703 int orig_node = adapter->node;
705 for (i = 0; i < adapter->num_tx_queues; i++) {
706 if (orig_node == -1) {
707 int cur_node = next_online_node(adapter->node);
708 if (cur_node == MAX_NUMNODES)
709 cur_node = first_online_node;
710 adapter->node = cur_node;
712 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
713 adapter->node);
714 if (!ring)
715 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
716 if (!ring)
717 goto err;
718 ring->count = adapter->tx_ring_count;
719 ring->queue_index = i;
720 ring->dev = &adapter->pdev->dev;
721 ring->netdev = adapter->netdev;
722 ring->numa_node = adapter->node;
723 /* For 82575, context index must be unique per ring. */
724 if (adapter->hw.mac.type == e1000_82575)
725 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
726 adapter->tx_ring[i] = ring;
728 /* Restore the adapter's original node */
729 adapter->node = orig_node;
731 for (i = 0; i < adapter->num_rx_queues; i++) {
732 if (orig_node == -1) {
733 int cur_node = next_online_node(adapter->node);
734 if (cur_node == MAX_NUMNODES)
735 cur_node = first_online_node;
736 adapter->node = cur_node;
738 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
739 adapter->node);
740 if (!ring)
741 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
742 if (!ring)
743 goto err;
744 ring->count = adapter->rx_ring_count;
745 ring->queue_index = i;
746 ring->dev = &adapter->pdev->dev;
747 ring->netdev = adapter->netdev;
748 ring->numa_node = adapter->node;
749 /* set flag indicating ring supports SCTP checksum offload */
750 if (adapter->hw.mac.type >= e1000_82576)
751 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
753 /* On i350, loopback VLAN packets have the tag byte-swapped. */
754 if (adapter->hw.mac.type == e1000_i350)
755 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
757 adapter->rx_ring[i] = ring;
759 /* Restore the adapter's original node */
760 adapter->node = orig_node;
762 igb_cache_ring_register(adapter);
764 return 0;
766 err:
767 /* Restore the adapter's original node */
768 adapter->node = orig_node;
769 igb_free_queues(adapter);
771 return -ENOMEM;
775 * igb_write_ivar - configure ivar for given MSI-X vector
776 * @hw: pointer to the HW structure
777 * @msix_vector: vector number we are allocating to a given ring
778 * @index: row index of IVAR register to write within IVAR table
779 * @offset: column offset of in IVAR, should be multiple of 8
781 * This function is intended to handle the writing of the IVAR register
782 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
783 * each containing an cause allocation for an Rx and Tx ring, and a
784 * variable number of rows depending on the number of queues supported.
786 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
787 int index, int offset)
789 u32 ivar = array_rd32(E1000_IVAR0, index);
791 /* clear any bits that are currently set */
792 ivar &= ~((u32)0xFF << offset);
794 /* write vector and valid bit */
795 ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
797 array_wr32(E1000_IVAR0, index, ivar);
800 #define IGB_N0_QUEUE -1
801 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
803 struct igb_adapter *adapter = q_vector->adapter;
804 struct e1000_hw *hw = &adapter->hw;
805 int rx_queue = IGB_N0_QUEUE;
806 int tx_queue = IGB_N0_QUEUE;
807 u32 msixbm = 0;
809 if (q_vector->rx.ring)
810 rx_queue = q_vector->rx.ring->reg_idx;
811 if (q_vector->tx.ring)
812 tx_queue = q_vector->tx.ring->reg_idx;
814 switch (hw->mac.type) {
815 case e1000_82575:
816 /* The 82575 assigns vectors using a bitmask, which matches the
817 bitmask for the EICR/EIMS/EIMC registers. To assign one
818 or more queues to a vector, we write the appropriate bits
819 into the MSIXBM register for that vector. */
820 if (rx_queue > IGB_N0_QUEUE)
821 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
822 if (tx_queue > IGB_N0_QUEUE)
823 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
824 if (!adapter->msix_entries && msix_vector == 0)
825 msixbm |= E1000_EIMS_OTHER;
826 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
827 q_vector->eims_value = msixbm;
828 break;
829 case e1000_82576:
831 * 82576 uses a table that essentially consists of 2 columns
832 * with 8 rows. The ordering is column-major so we use the
833 * lower 3 bits as the row index, and the 4th bit as the
834 * column offset.
836 if (rx_queue > IGB_N0_QUEUE)
837 igb_write_ivar(hw, msix_vector,
838 rx_queue & 0x7,
839 (rx_queue & 0x8) << 1);
840 if (tx_queue > IGB_N0_QUEUE)
841 igb_write_ivar(hw, msix_vector,
842 tx_queue & 0x7,
843 ((tx_queue & 0x8) << 1) + 8);
844 q_vector->eims_value = 1 << msix_vector;
845 break;
846 case e1000_82580:
847 case e1000_i350:
849 * On 82580 and newer adapters the scheme is similar to 82576
850 * however instead of ordering column-major we have things
851 * ordered row-major. So we traverse the table by using
852 * bit 0 as the column offset, and the remaining bits as the
853 * row index.
855 if (rx_queue > IGB_N0_QUEUE)
856 igb_write_ivar(hw, msix_vector,
857 rx_queue >> 1,
858 (rx_queue & 0x1) << 4);
859 if (tx_queue > IGB_N0_QUEUE)
860 igb_write_ivar(hw, msix_vector,
861 tx_queue >> 1,
862 ((tx_queue & 0x1) << 4) + 8);
863 q_vector->eims_value = 1 << msix_vector;
864 break;
865 default:
866 BUG();
867 break;
870 /* add q_vector eims value to global eims_enable_mask */
871 adapter->eims_enable_mask |= q_vector->eims_value;
873 /* configure q_vector to set itr on first interrupt */
874 q_vector->set_itr = 1;
878 * igb_configure_msix - Configure MSI-X hardware
880 * igb_configure_msix sets up the hardware to properly
881 * generate MSI-X interrupts.
883 static void igb_configure_msix(struct igb_adapter *adapter)
885 u32 tmp;
886 int i, vector = 0;
887 struct e1000_hw *hw = &adapter->hw;
889 adapter->eims_enable_mask = 0;
891 /* set vector for other causes, i.e. link changes */
892 switch (hw->mac.type) {
893 case e1000_82575:
894 tmp = rd32(E1000_CTRL_EXT);
895 /* enable MSI-X PBA support*/
896 tmp |= E1000_CTRL_EXT_PBA_CLR;
898 /* Auto-Mask interrupts upon ICR read. */
899 tmp |= E1000_CTRL_EXT_EIAME;
900 tmp |= E1000_CTRL_EXT_IRCA;
902 wr32(E1000_CTRL_EXT, tmp);
904 /* enable msix_other interrupt */
905 array_wr32(E1000_MSIXBM(0), vector++,
906 E1000_EIMS_OTHER);
907 adapter->eims_other = E1000_EIMS_OTHER;
909 break;
911 case e1000_82576:
912 case e1000_82580:
913 case e1000_i350:
914 /* Turn on MSI-X capability first, or our settings
915 * won't stick. And it will take days to debug. */
916 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
917 E1000_GPIE_PBA | E1000_GPIE_EIAME |
918 E1000_GPIE_NSICR);
920 /* enable msix_other interrupt */
921 adapter->eims_other = 1 << vector;
922 tmp = (vector++ | E1000_IVAR_VALID) << 8;
924 wr32(E1000_IVAR_MISC, tmp);
925 break;
926 default:
927 /* do nothing, since nothing else supports MSI-X */
928 break;
929 } /* switch (hw->mac.type) */
931 adapter->eims_enable_mask |= adapter->eims_other;
933 for (i = 0; i < adapter->num_q_vectors; i++)
934 igb_assign_vector(adapter->q_vector[i], vector++);
936 wrfl();
940 * igb_request_msix - Initialize MSI-X interrupts
942 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
943 * kernel.
945 static int igb_request_msix(struct igb_adapter *adapter)
947 struct net_device *netdev = adapter->netdev;
948 struct e1000_hw *hw = &adapter->hw;
949 int i, err = 0, vector = 0;
951 err = request_irq(adapter->msix_entries[vector].vector,
952 igb_msix_other, 0, netdev->name, adapter);
953 if (err)
954 goto out;
955 vector++;
957 for (i = 0; i < adapter->num_q_vectors; i++) {
958 struct igb_q_vector *q_vector = adapter->q_vector[i];
960 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
962 if (q_vector->rx.ring && q_vector->tx.ring)
963 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
964 q_vector->rx.ring->queue_index);
965 else if (q_vector->tx.ring)
966 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
967 q_vector->tx.ring->queue_index);
968 else if (q_vector->rx.ring)
969 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
970 q_vector->rx.ring->queue_index);
971 else
972 sprintf(q_vector->name, "%s-unused", netdev->name);
974 err = request_irq(adapter->msix_entries[vector].vector,
975 igb_msix_ring, 0, q_vector->name,
976 q_vector);
977 if (err)
978 goto out;
979 vector++;
982 igb_configure_msix(adapter);
983 return 0;
984 out:
985 return err;
988 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
990 if (adapter->msix_entries) {
991 pci_disable_msix(adapter->pdev);
992 kfree(adapter->msix_entries);
993 adapter->msix_entries = NULL;
994 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
995 pci_disable_msi(adapter->pdev);
1000 * igb_free_q_vectors - Free memory allocated for interrupt vectors
1001 * @adapter: board private structure to initialize
1003 * This function frees the memory allocated to the q_vectors. In addition if
1004 * NAPI is enabled it will delete any references to the NAPI struct prior
1005 * to freeing the q_vector.
1007 static void igb_free_q_vectors(struct igb_adapter *adapter)
1009 int v_idx;
1011 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1012 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1013 adapter->q_vector[v_idx] = NULL;
1014 if (!q_vector)
1015 continue;
1016 netif_napi_del(&q_vector->napi);
1017 kfree(q_vector);
1019 adapter->num_q_vectors = 0;
1023 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1025 * This function resets the device so that it has 0 rx queues, tx queues, and
1026 * MSI-X interrupts allocated.
1028 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1030 igb_free_queues(adapter);
1031 igb_free_q_vectors(adapter);
1032 igb_reset_interrupt_capability(adapter);
1036 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1038 * Attempt to configure interrupts using the best available
1039 * capabilities of the hardware and kernel.
1041 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1043 int err;
1044 int numvecs, i;
1046 /* Number of supported queues. */
1047 adapter->num_rx_queues = adapter->rss_queues;
1048 if (adapter->vfs_allocated_count)
1049 adapter->num_tx_queues = 1;
1050 else
1051 adapter->num_tx_queues = adapter->rss_queues;
1053 /* start with one vector for every rx queue */
1054 numvecs = adapter->num_rx_queues;
1056 /* if tx handler is separate add 1 for every tx queue */
1057 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1058 numvecs += adapter->num_tx_queues;
1060 /* store the number of vectors reserved for queues */
1061 adapter->num_q_vectors = numvecs;
1063 /* add 1 vector for link status interrupts */
1064 numvecs++;
1065 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1066 GFP_KERNEL);
1067 if (!adapter->msix_entries)
1068 goto msi_only;
1070 for (i = 0; i < numvecs; i++)
1071 adapter->msix_entries[i].entry = i;
1073 err = pci_enable_msix(adapter->pdev,
1074 adapter->msix_entries,
1075 numvecs);
1076 if (err == 0)
1077 goto out;
1079 igb_reset_interrupt_capability(adapter);
1081 /* If we can't do MSI-X, try MSI */
1082 msi_only:
1083 #ifdef CONFIG_PCI_IOV
1084 /* disable SR-IOV for non MSI-X configurations */
1085 if (adapter->vf_data) {
1086 struct e1000_hw *hw = &adapter->hw;
1087 /* disable iov and allow time for transactions to clear */
1088 pci_disable_sriov(adapter->pdev);
1089 msleep(500);
1091 kfree(adapter->vf_data);
1092 adapter->vf_data = NULL;
1093 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1094 wrfl();
1095 msleep(100);
1096 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1098 #endif
1099 adapter->vfs_allocated_count = 0;
1100 adapter->rss_queues = 1;
1101 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1102 adapter->num_rx_queues = 1;
1103 adapter->num_tx_queues = 1;
1104 adapter->num_q_vectors = 1;
1105 if (!pci_enable_msi(adapter->pdev))
1106 adapter->flags |= IGB_FLAG_HAS_MSI;
1107 out:
1108 /* Notify the stack of the (possibly) reduced queue counts. */
1109 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1110 return netif_set_real_num_rx_queues(adapter->netdev,
1111 adapter->num_rx_queues);
1115 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1116 * @adapter: board private structure to initialize
1118 * We allocate one q_vector per queue interrupt. If allocation fails we
1119 * return -ENOMEM.
1121 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1123 struct igb_q_vector *q_vector;
1124 struct e1000_hw *hw = &adapter->hw;
1125 int v_idx;
1126 int orig_node = adapter->node;
1128 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1129 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1130 adapter->num_tx_queues)) &&
1131 (adapter->num_rx_queues == v_idx))
1132 adapter->node = orig_node;
1133 if (orig_node == -1) {
1134 int cur_node = next_online_node(adapter->node);
1135 if (cur_node == MAX_NUMNODES)
1136 cur_node = first_online_node;
1137 adapter->node = cur_node;
1139 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1140 adapter->node);
1141 if (!q_vector)
1142 q_vector = kzalloc(sizeof(struct igb_q_vector),
1143 GFP_KERNEL);
1144 if (!q_vector)
1145 goto err_out;
1146 q_vector->adapter = adapter;
1147 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1148 q_vector->itr_val = IGB_START_ITR;
1149 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1150 adapter->q_vector[v_idx] = q_vector;
1152 /* Restore the adapter's original node */
1153 adapter->node = orig_node;
1155 return 0;
1157 err_out:
1158 /* Restore the adapter's original node */
1159 adapter->node = orig_node;
1160 igb_free_q_vectors(adapter);
1161 return -ENOMEM;
1164 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1165 int ring_idx, int v_idx)
1167 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1169 q_vector->rx.ring = adapter->rx_ring[ring_idx];
1170 q_vector->rx.ring->q_vector = q_vector;
1171 q_vector->rx.count++;
1172 q_vector->itr_val = adapter->rx_itr_setting;
1173 if (q_vector->itr_val && q_vector->itr_val <= 3)
1174 q_vector->itr_val = IGB_START_ITR;
1177 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1178 int ring_idx, int v_idx)
1180 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1182 q_vector->tx.ring = adapter->tx_ring[ring_idx];
1183 q_vector->tx.ring->q_vector = q_vector;
1184 q_vector->tx.count++;
1185 q_vector->itr_val = adapter->tx_itr_setting;
1186 q_vector->tx.work_limit = adapter->tx_work_limit;
1187 if (q_vector->itr_val && q_vector->itr_val <= 3)
1188 q_vector->itr_val = IGB_START_ITR;
1192 * igb_map_ring_to_vector - maps allocated queues to vectors
1194 * This function maps the recently allocated queues to vectors.
1196 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1198 int i;
1199 int v_idx = 0;
1201 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1202 (adapter->num_q_vectors < adapter->num_tx_queues))
1203 return -ENOMEM;
1205 if (adapter->num_q_vectors >=
1206 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1207 for (i = 0; i < adapter->num_rx_queues; i++)
1208 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1209 for (i = 0; i < adapter->num_tx_queues; i++)
1210 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1211 } else {
1212 for (i = 0; i < adapter->num_rx_queues; i++) {
1213 if (i < adapter->num_tx_queues)
1214 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1215 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1217 for (; i < adapter->num_tx_queues; i++)
1218 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1220 return 0;
1224 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1226 * This function initializes the interrupts and allocates all of the queues.
1228 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1230 struct pci_dev *pdev = adapter->pdev;
1231 int err;
1233 err = igb_set_interrupt_capability(adapter);
1234 if (err)
1235 return err;
1237 err = igb_alloc_q_vectors(adapter);
1238 if (err) {
1239 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1240 goto err_alloc_q_vectors;
1243 err = igb_alloc_queues(adapter);
1244 if (err) {
1245 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1246 goto err_alloc_queues;
1249 err = igb_map_ring_to_vector(adapter);
1250 if (err) {
1251 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1252 goto err_map_queues;
1256 return 0;
1257 err_map_queues:
1258 igb_free_queues(adapter);
1259 err_alloc_queues:
1260 igb_free_q_vectors(adapter);
1261 err_alloc_q_vectors:
1262 igb_reset_interrupt_capability(adapter);
1263 return err;
1267 * igb_request_irq - initialize interrupts
1269 * Attempts to configure interrupts using the best available
1270 * capabilities of the hardware and kernel.
1272 static int igb_request_irq(struct igb_adapter *adapter)
1274 struct net_device *netdev = adapter->netdev;
1275 struct pci_dev *pdev = adapter->pdev;
1276 int err = 0;
1278 if (adapter->msix_entries) {
1279 err = igb_request_msix(adapter);
1280 if (!err)
1281 goto request_done;
1282 /* fall back to MSI */
1283 igb_clear_interrupt_scheme(adapter);
1284 if (!pci_enable_msi(pdev))
1285 adapter->flags |= IGB_FLAG_HAS_MSI;
1286 igb_free_all_tx_resources(adapter);
1287 igb_free_all_rx_resources(adapter);
1288 adapter->num_tx_queues = 1;
1289 adapter->num_rx_queues = 1;
1290 adapter->num_q_vectors = 1;
1291 err = igb_alloc_q_vectors(adapter);
1292 if (err) {
1293 dev_err(&pdev->dev,
1294 "Unable to allocate memory for vectors\n");
1295 goto request_done;
1297 err = igb_alloc_queues(adapter);
1298 if (err) {
1299 dev_err(&pdev->dev,
1300 "Unable to allocate memory for queues\n");
1301 igb_free_q_vectors(adapter);
1302 goto request_done;
1304 igb_setup_all_tx_resources(adapter);
1305 igb_setup_all_rx_resources(adapter);
1308 igb_assign_vector(adapter->q_vector[0], 0);
1310 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1311 err = request_irq(pdev->irq, igb_intr_msi, 0,
1312 netdev->name, adapter);
1313 if (!err)
1314 goto request_done;
1316 /* fall back to legacy interrupts */
1317 igb_reset_interrupt_capability(adapter);
1318 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1321 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1322 netdev->name, adapter);
1324 if (err)
1325 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1326 err);
1328 request_done:
1329 return err;
1332 static void igb_free_irq(struct igb_adapter *adapter)
1334 if (adapter->msix_entries) {
1335 int vector = 0, i;
1337 free_irq(adapter->msix_entries[vector++].vector, adapter);
1339 for (i = 0; i < adapter->num_q_vectors; i++)
1340 free_irq(adapter->msix_entries[vector++].vector,
1341 adapter->q_vector[i]);
1342 } else {
1343 free_irq(adapter->pdev->irq, adapter);
1348 * igb_irq_disable - Mask off interrupt generation on the NIC
1349 * @adapter: board private structure
1351 static void igb_irq_disable(struct igb_adapter *adapter)
1353 struct e1000_hw *hw = &adapter->hw;
1356 * we need to be careful when disabling interrupts. The VFs are also
1357 * mapped into these registers and so clearing the bits can cause
1358 * issues on the VF drivers so we only need to clear what we set
1360 if (adapter->msix_entries) {
1361 u32 regval = rd32(E1000_EIAM);
1362 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1363 wr32(E1000_EIMC, adapter->eims_enable_mask);
1364 regval = rd32(E1000_EIAC);
1365 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1368 wr32(E1000_IAM, 0);
1369 wr32(E1000_IMC, ~0);
1370 wrfl();
1371 if (adapter->msix_entries) {
1372 int i;
1373 for (i = 0; i < adapter->num_q_vectors; i++)
1374 synchronize_irq(adapter->msix_entries[i].vector);
1375 } else {
1376 synchronize_irq(adapter->pdev->irq);
1381 * igb_irq_enable - Enable default interrupt generation settings
1382 * @adapter: board private structure
1384 static void igb_irq_enable(struct igb_adapter *adapter)
1386 struct e1000_hw *hw = &adapter->hw;
1388 if (adapter->msix_entries) {
1389 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1390 u32 regval = rd32(E1000_EIAC);
1391 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1392 regval = rd32(E1000_EIAM);
1393 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1394 wr32(E1000_EIMS, adapter->eims_enable_mask);
1395 if (adapter->vfs_allocated_count) {
1396 wr32(E1000_MBVFIMR, 0xFF);
1397 ims |= E1000_IMS_VMMB;
1399 wr32(E1000_IMS, ims);
1400 } else {
1401 wr32(E1000_IMS, IMS_ENABLE_MASK |
1402 E1000_IMS_DRSTA);
1403 wr32(E1000_IAM, IMS_ENABLE_MASK |
1404 E1000_IMS_DRSTA);
1408 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1410 struct e1000_hw *hw = &adapter->hw;
1411 u16 vid = adapter->hw.mng_cookie.vlan_id;
1412 u16 old_vid = adapter->mng_vlan_id;
1414 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1415 /* add VID to filter table */
1416 igb_vfta_set(hw, vid, true);
1417 adapter->mng_vlan_id = vid;
1418 } else {
1419 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1422 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1423 (vid != old_vid) &&
1424 !test_bit(old_vid, adapter->active_vlans)) {
1425 /* remove VID from filter table */
1426 igb_vfta_set(hw, old_vid, false);
1431 * igb_release_hw_control - release control of the h/w to f/w
1432 * @adapter: address of board private structure
1434 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1435 * For ASF and Pass Through versions of f/w this means that the
1436 * driver is no longer loaded.
1439 static void igb_release_hw_control(struct igb_adapter *adapter)
1441 struct e1000_hw *hw = &adapter->hw;
1442 u32 ctrl_ext;
1444 /* Let firmware take over control of h/w */
1445 ctrl_ext = rd32(E1000_CTRL_EXT);
1446 wr32(E1000_CTRL_EXT,
1447 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1451 * igb_get_hw_control - get control of the h/w from f/w
1452 * @adapter: address of board private structure
1454 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1455 * For ASF and Pass Through versions of f/w this means that
1456 * the driver is loaded.
1459 static void igb_get_hw_control(struct igb_adapter *adapter)
1461 struct e1000_hw *hw = &adapter->hw;
1462 u32 ctrl_ext;
1464 /* Let firmware know the driver has taken over */
1465 ctrl_ext = rd32(E1000_CTRL_EXT);
1466 wr32(E1000_CTRL_EXT,
1467 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1471 * igb_configure - configure the hardware for RX and TX
1472 * @adapter: private board structure
1474 static void igb_configure(struct igb_adapter *adapter)
1476 struct net_device *netdev = adapter->netdev;
1477 int i;
1479 igb_get_hw_control(adapter);
1480 igb_set_rx_mode(netdev);
1482 igb_restore_vlan(adapter);
1484 igb_setup_tctl(adapter);
1485 igb_setup_mrqc(adapter);
1486 igb_setup_rctl(adapter);
1488 igb_configure_tx(adapter);
1489 igb_configure_rx(adapter);
1491 igb_rx_fifo_flush_82575(&adapter->hw);
1493 /* call igb_desc_unused which always leaves
1494 * at least 1 descriptor unused to make sure
1495 * next_to_use != next_to_clean */
1496 for (i = 0; i < adapter->num_rx_queues; i++) {
1497 struct igb_ring *ring = adapter->rx_ring[i];
1498 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1503 * igb_power_up_link - Power up the phy/serdes link
1504 * @adapter: address of board private structure
1506 void igb_power_up_link(struct igb_adapter *adapter)
1508 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1509 igb_power_up_phy_copper(&adapter->hw);
1510 else
1511 igb_power_up_serdes_link_82575(&adapter->hw);
1512 igb_reset_phy(&adapter->hw);
1516 * igb_power_down_link - Power down the phy/serdes link
1517 * @adapter: address of board private structure
1519 static void igb_power_down_link(struct igb_adapter *adapter)
1521 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1522 igb_power_down_phy_copper_82575(&adapter->hw);
1523 else
1524 igb_shutdown_serdes_link_82575(&adapter->hw);
1528 * igb_up - Open the interface and prepare it to handle traffic
1529 * @adapter: board private structure
1531 int igb_up(struct igb_adapter *adapter)
1533 struct e1000_hw *hw = &adapter->hw;
1534 int i;
1536 /* hardware has been reset, we need to reload some things */
1537 igb_configure(adapter);
1539 clear_bit(__IGB_DOWN, &adapter->state);
1541 for (i = 0; i < adapter->num_q_vectors; i++)
1542 napi_enable(&(adapter->q_vector[i]->napi));
1544 if (adapter->msix_entries)
1545 igb_configure_msix(adapter);
1546 else
1547 igb_assign_vector(adapter->q_vector[0], 0);
1549 /* Clear any pending interrupts. */
1550 rd32(E1000_ICR);
1551 igb_irq_enable(adapter);
1553 /* notify VFs that reset has been completed */
1554 if (adapter->vfs_allocated_count) {
1555 u32 reg_data = rd32(E1000_CTRL_EXT);
1556 reg_data |= E1000_CTRL_EXT_PFRSTD;
1557 wr32(E1000_CTRL_EXT, reg_data);
1560 netif_tx_start_all_queues(adapter->netdev);
1562 /* start the watchdog. */
1563 hw->mac.get_link_status = 1;
1564 schedule_work(&adapter->watchdog_task);
1566 return 0;
1569 void igb_down(struct igb_adapter *adapter)
1571 struct net_device *netdev = adapter->netdev;
1572 struct e1000_hw *hw = &adapter->hw;
1573 u32 tctl, rctl;
1574 int i;
1576 /* signal that we're down so the interrupt handler does not
1577 * reschedule our watchdog timer */
1578 set_bit(__IGB_DOWN, &adapter->state);
1580 /* disable receives in the hardware */
1581 rctl = rd32(E1000_RCTL);
1582 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1583 /* flush and sleep below */
1585 netif_tx_stop_all_queues(netdev);
1587 /* disable transmits in the hardware */
1588 tctl = rd32(E1000_TCTL);
1589 tctl &= ~E1000_TCTL_EN;
1590 wr32(E1000_TCTL, tctl);
1591 /* flush both disables and wait for them to finish */
1592 wrfl();
1593 msleep(10);
1595 for (i = 0; i < adapter->num_q_vectors; i++)
1596 napi_disable(&(adapter->q_vector[i]->napi));
1598 igb_irq_disable(adapter);
1600 del_timer_sync(&adapter->watchdog_timer);
1601 del_timer_sync(&adapter->phy_info_timer);
1603 netif_carrier_off(netdev);
1605 /* record the stats before reset*/
1606 spin_lock(&adapter->stats64_lock);
1607 igb_update_stats(adapter, &adapter->stats64);
1608 spin_unlock(&adapter->stats64_lock);
1610 adapter->link_speed = 0;
1611 adapter->link_duplex = 0;
1613 if (!pci_channel_offline(adapter->pdev))
1614 igb_reset(adapter);
1615 igb_clean_all_tx_rings(adapter);
1616 igb_clean_all_rx_rings(adapter);
1617 #ifdef CONFIG_IGB_DCA
1619 /* since we reset the hardware DCA settings were cleared */
1620 igb_setup_dca(adapter);
1621 #endif
1624 void igb_reinit_locked(struct igb_adapter *adapter)
1626 WARN_ON(in_interrupt());
1627 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1628 msleep(1);
1629 igb_down(adapter);
1630 igb_up(adapter);
1631 clear_bit(__IGB_RESETTING, &adapter->state);
1634 void igb_reset(struct igb_adapter *adapter)
1636 struct pci_dev *pdev = adapter->pdev;
1637 struct e1000_hw *hw = &adapter->hw;
1638 struct e1000_mac_info *mac = &hw->mac;
1639 struct e1000_fc_info *fc = &hw->fc;
1640 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1641 u16 hwm;
1643 /* Repartition Pba for greater than 9k mtu
1644 * To take effect CTRL.RST is required.
1646 switch (mac->type) {
1647 case e1000_i350:
1648 case e1000_82580:
1649 pba = rd32(E1000_RXPBS);
1650 pba = igb_rxpbs_adjust_82580(pba);
1651 break;
1652 case e1000_82576:
1653 pba = rd32(E1000_RXPBS);
1654 pba &= E1000_RXPBS_SIZE_MASK_82576;
1655 break;
1656 case e1000_82575:
1657 default:
1658 pba = E1000_PBA_34K;
1659 break;
1662 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1663 (mac->type < e1000_82576)) {
1664 /* adjust PBA for jumbo frames */
1665 wr32(E1000_PBA, pba);
1667 /* To maintain wire speed transmits, the Tx FIFO should be
1668 * large enough to accommodate two full transmit packets,
1669 * rounded up to the next 1KB and expressed in KB. Likewise,
1670 * the Rx FIFO should be large enough to accommodate at least
1671 * one full receive packet and is similarly rounded up and
1672 * expressed in KB. */
1673 pba = rd32(E1000_PBA);
1674 /* upper 16 bits has Tx packet buffer allocation size in KB */
1675 tx_space = pba >> 16;
1676 /* lower 16 bits has Rx packet buffer allocation size in KB */
1677 pba &= 0xffff;
1678 /* the tx fifo also stores 16 bytes of information about the tx
1679 * but don't include ethernet FCS because hardware appends it */
1680 min_tx_space = (adapter->max_frame_size +
1681 sizeof(union e1000_adv_tx_desc) -
1682 ETH_FCS_LEN) * 2;
1683 min_tx_space = ALIGN(min_tx_space, 1024);
1684 min_tx_space >>= 10;
1685 /* software strips receive CRC, so leave room for it */
1686 min_rx_space = adapter->max_frame_size;
1687 min_rx_space = ALIGN(min_rx_space, 1024);
1688 min_rx_space >>= 10;
1690 /* If current Tx allocation is less than the min Tx FIFO size,
1691 * and the min Tx FIFO size is less than the current Rx FIFO
1692 * allocation, take space away from current Rx allocation */
1693 if (tx_space < min_tx_space &&
1694 ((min_tx_space - tx_space) < pba)) {
1695 pba = pba - (min_tx_space - tx_space);
1697 /* if short on rx space, rx wins and must trump tx
1698 * adjustment */
1699 if (pba < min_rx_space)
1700 pba = min_rx_space;
1702 wr32(E1000_PBA, pba);
1705 /* flow control settings */
1706 /* The high water mark must be low enough to fit one full frame
1707 * (or the size used for early receive) above it in the Rx FIFO.
1708 * Set it to the lower of:
1709 * - 90% of the Rx FIFO size, or
1710 * - the full Rx FIFO size minus one full frame */
1711 hwm = min(((pba << 10) * 9 / 10),
1712 ((pba << 10) - 2 * adapter->max_frame_size));
1714 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1715 fc->low_water = fc->high_water - 16;
1716 fc->pause_time = 0xFFFF;
1717 fc->send_xon = 1;
1718 fc->current_mode = fc->requested_mode;
1720 /* disable receive for all VFs and wait one second */
1721 if (adapter->vfs_allocated_count) {
1722 int i;
1723 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1724 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1726 /* ping all the active vfs to let them know we are going down */
1727 igb_ping_all_vfs(adapter);
1729 /* disable transmits and receives */
1730 wr32(E1000_VFRE, 0);
1731 wr32(E1000_VFTE, 0);
1734 /* Allow time for pending master requests to run */
1735 hw->mac.ops.reset_hw(hw);
1736 wr32(E1000_WUC, 0);
1738 if (hw->mac.ops.init_hw(hw))
1739 dev_err(&pdev->dev, "Hardware Error\n");
1741 igb_init_dmac(adapter, pba);
1742 if (!netif_running(adapter->netdev))
1743 igb_power_down_link(adapter);
1745 igb_update_mng_vlan(adapter);
1747 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1748 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1750 igb_get_phy_info(hw);
1753 static netdev_features_t igb_fix_features(struct net_device *netdev,
1754 netdev_features_t features)
1757 * Since there is no support for separate rx/tx vlan accel
1758 * enable/disable make sure tx flag is always in same state as rx.
1760 if (features & NETIF_F_HW_VLAN_RX)
1761 features |= NETIF_F_HW_VLAN_TX;
1762 else
1763 features &= ~NETIF_F_HW_VLAN_TX;
1765 return features;
1768 static int igb_set_features(struct net_device *netdev,
1769 netdev_features_t features)
1771 netdev_features_t changed = netdev->features ^ features;
1773 if (changed & NETIF_F_HW_VLAN_RX)
1774 igb_vlan_mode(netdev, features);
1776 return 0;
1779 static const struct net_device_ops igb_netdev_ops = {
1780 .ndo_open = igb_open,
1781 .ndo_stop = igb_close,
1782 .ndo_start_xmit = igb_xmit_frame,
1783 .ndo_get_stats64 = igb_get_stats64,
1784 .ndo_set_rx_mode = igb_set_rx_mode,
1785 .ndo_set_mac_address = igb_set_mac,
1786 .ndo_change_mtu = igb_change_mtu,
1787 .ndo_do_ioctl = igb_ioctl,
1788 .ndo_tx_timeout = igb_tx_timeout,
1789 .ndo_validate_addr = eth_validate_addr,
1790 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1791 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1792 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1793 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1794 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1795 .ndo_get_vf_config = igb_ndo_get_vf_config,
1796 #ifdef CONFIG_NET_POLL_CONTROLLER
1797 .ndo_poll_controller = igb_netpoll,
1798 #endif
1799 .ndo_fix_features = igb_fix_features,
1800 .ndo_set_features = igb_set_features,
1804 * igb_probe - Device Initialization Routine
1805 * @pdev: PCI device information struct
1806 * @ent: entry in igb_pci_tbl
1808 * Returns 0 on success, negative on failure
1810 * igb_probe initializes an adapter identified by a pci_dev structure.
1811 * The OS initialization, configuring of the adapter private structure,
1812 * and a hardware reset occur.
1814 static int __devinit igb_probe(struct pci_dev *pdev,
1815 const struct pci_device_id *ent)
1817 struct net_device *netdev;
1818 struct igb_adapter *adapter;
1819 struct e1000_hw *hw;
1820 u16 eeprom_data = 0;
1821 s32 ret_val;
1822 static int global_quad_port_a; /* global quad port a indication */
1823 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1824 unsigned long mmio_start, mmio_len;
1825 int err, pci_using_dac;
1826 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1827 u8 part_str[E1000_PBANUM_LENGTH];
1829 /* Catch broken hardware that put the wrong VF device ID in
1830 * the PCIe SR-IOV capability.
1832 if (pdev->is_virtfn) {
1833 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1834 pci_name(pdev), pdev->vendor, pdev->device);
1835 return -EINVAL;
1838 err = pci_enable_device_mem(pdev);
1839 if (err)
1840 return err;
1842 pci_using_dac = 0;
1843 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1844 if (!err) {
1845 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1846 if (!err)
1847 pci_using_dac = 1;
1848 } else {
1849 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1850 if (err) {
1851 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1852 if (err) {
1853 dev_err(&pdev->dev, "No usable DMA "
1854 "configuration, aborting\n");
1855 goto err_dma;
1860 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1861 IORESOURCE_MEM),
1862 igb_driver_name);
1863 if (err)
1864 goto err_pci_reg;
1866 pci_enable_pcie_error_reporting(pdev);
1868 pci_set_master(pdev);
1869 pci_save_state(pdev);
1871 err = -ENOMEM;
1872 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1873 IGB_MAX_TX_QUEUES);
1874 if (!netdev)
1875 goto err_alloc_etherdev;
1877 SET_NETDEV_DEV(netdev, &pdev->dev);
1879 pci_set_drvdata(pdev, netdev);
1880 adapter = netdev_priv(netdev);
1881 adapter->netdev = netdev;
1882 adapter->pdev = pdev;
1883 hw = &adapter->hw;
1884 hw->back = adapter;
1885 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1887 mmio_start = pci_resource_start(pdev, 0);
1888 mmio_len = pci_resource_len(pdev, 0);
1890 err = -EIO;
1891 hw->hw_addr = ioremap(mmio_start, mmio_len);
1892 if (!hw->hw_addr)
1893 goto err_ioremap;
1895 netdev->netdev_ops = &igb_netdev_ops;
1896 igb_set_ethtool_ops(netdev);
1897 netdev->watchdog_timeo = 5 * HZ;
1899 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1901 netdev->mem_start = mmio_start;
1902 netdev->mem_end = mmio_start + mmio_len;
1904 /* PCI config space info */
1905 hw->vendor_id = pdev->vendor;
1906 hw->device_id = pdev->device;
1907 hw->revision_id = pdev->revision;
1908 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1909 hw->subsystem_device_id = pdev->subsystem_device;
1911 /* Copy the default MAC, PHY and NVM function pointers */
1912 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1913 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1914 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1915 /* Initialize skew-specific constants */
1916 err = ei->get_invariants(hw);
1917 if (err)
1918 goto err_sw_init;
1920 /* setup the private structure */
1921 err = igb_sw_init(adapter);
1922 if (err)
1923 goto err_sw_init;
1925 igb_get_bus_info_pcie(hw);
1927 hw->phy.autoneg_wait_to_complete = false;
1929 /* Copper options */
1930 if (hw->phy.media_type == e1000_media_type_copper) {
1931 hw->phy.mdix = AUTO_ALL_MODES;
1932 hw->phy.disable_polarity_correction = false;
1933 hw->phy.ms_type = e1000_ms_hw_default;
1936 if (igb_check_reset_block(hw))
1937 dev_info(&pdev->dev,
1938 "PHY reset is blocked due to SOL/IDER session.\n");
1941 * features is initialized to 0 in allocation, it might have bits
1942 * set by igb_sw_init so we should use an or instead of an
1943 * assignment.
1945 netdev->features |= NETIF_F_SG |
1946 NETIF_F_IP_CSUM |
1947 NETIF_F_IPV6_CSUM |
1948 NETIF_F_TSO |
1949 NETIF_F_TSO6 |
1950 NETIF_F_RXHASH |
1951 NETIF_F_RXCSUM |
1952 NETIF_F_HW_VLAN_RX |
1953 NETIF_F_HW_VLAN_TX;
1955 /* copy netdev features into list of user selectable features */
1956 netdev->hw_features |= netdev->features;
1958 /* set this bit last since it cannot be part of hw_features */
1959 netdev->features |= NETIF_F_HW_VLAN_FILTER;
1961 netdev->vlan_features |= NETIF_F_TSO |
1962 NETIF_F_TSO6 |
1963 NETIF_F_IP_CSUM |
1964 NETIF_F_IPV6_CSUM |
1965 NETIF_F_SG;
1967 if (pci_using_dac) {
1968 netdev->features |= NETIF_F_HIGHDMA;
1969 netdev->vlan_features |= NETIF_F_HIGHDMA;
1972 if (hw->mac.type >= e1000_82576) {
1973 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1974 netdev->features |= NETIF_F_SCTP_CSUM;
1977 netdev->priv_flags |= IFF_UNICAST_FLT;
1979 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1981 /* before reading the NVM, reset the controller to put the device in a
1982 * known good starting state */
1983 hw->mac.ops.reset_hw(hw);
1985 /* make sure the NVM is good */
1986 if (hw->nvm.ops.validate(hw) < 0) {
1987 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1988 err = -EIO;
1989 goto err_eeprom;
1992 /* copy the MAC address out of the NVM */
1993 if (hw->mac.ops.read_mac_addr(hw))
1994 dev_err(&pdev->dev, "NVM Read Error\n");
1996 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1997 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1999 if (!is_valid_ether_addr(netdev->perm_addr)) {
2000 dev_err(&pdev->dev, "Invalid MAC Address\n");
2001 err = -EIO;
2002 goto err_eeprom;
2005 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2006 (unsigned long) adapter);
2007 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2008 (unsigned long) adapter);
2010 INIT_WORK(&adapter->reset_task, igb_reset_task);
2011 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2013 /* Initialize link properties that are user-changeable */
2014 adapter->fc_autoneg = true;
2015 hw->mac.autoneg = true;
2016 hw->phy.autoneg_advertised = 0x2f;
2018 hw->fc.requested_mode = e1000_fc_default;
2019 hw->fc.current_mode = e1000_fc_default;
2021 igb_validate_mdi_setting(hw);
2023 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2024 * enable the ACPI Magic Packet filter
2027 if (hw->bus.func == 0)
2028 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2029 else if (hw->mac.type >= e1000_82580)
2030 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2031 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2032 &eeprom_data);
2033 else if (hw->bus.func == 1)
2034 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2036 if (eeprom_data & eeprom_apme_mask)
2037 adapter->eeprom_wol |= E1000_WUFC_MAG;
2039 /* now that we have the eeprom settings, apply the special cases where
2040 * the eeprom may be wrong or the board simply won't support wake on
2041 * lan on a particular port */
2042 switch (pdev->device) {
2043 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2044 adapter->eeprom_wol = 0;
2045 break;
2046 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2047 case E1000_DEV_ID_82576_FIBER:
2048 case E1000_DEV_ID_82576_SERDES:
2049 /* Wake events only supported on port A for dual fiber
2050 * regardless of eeprom setting */
2051 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2052 adapter->eeprom_wol = 0;
2053 break;
2054 case E1000_DEV_ID_82576_QUAD_COPPER:
2055 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2056 /* if quad port adapter, disable WoL on all but port A */
2057 if (global_quad_port_a != 0)
2058 adapter->eeprom_wol = 0;
2059 else
2060 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2061 /* Reset for multiple quad port adapters */
2062 if (++global_quad_port_a == 4)
2063 global_quad_port_a = 0;
2064 break;
2067 /* initialize the wol settings based on the eeprom settings */
2068 adapter->wol = adapter->eeprom_wol;
2069 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2071 /* reset the hardware with the new settings */
2072 igb_reset(adapter);
2074 /* let the f/w know that the h/w is now under the control of the
2075 * driver. */
2076 igb_get_hw_control(adapter);
2078 strcpy(netdev->name, "eth%d");
2079 err = register_netdev(netdev);
2080 if (err)
2081 goto err_register;
2083 /* carrier off reporting is important to ethtool even BEFORE open */
2084 netif_carrier_off(netdev);
2086 #ifdef CONFIG_IGB_DCA
2087 if (dca_add_requester(&pdev->dev) == 0) {
2088 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2089 dev_info(&pdev->dev, "DCA enabled\n");
2090 igb_setup_dca(adapter);
2093 #endif
2094 /* do hw tstamp init after resetting */
2095 igb_init_hw_timer(adapter);
2097 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2098 /* print bus type/speed/width info */
2099 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2100 netdev->name,
2101 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2102 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2103 "unknown"),
2104 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2105 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2106 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2107 "unknown"),
2108 netdev->dev_addr);
2110 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2111 if (ret_val)
2112 strcpy(part_str, "Unknown");
2113 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2114 dev_info(&pdev->dev,
2115 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2116 adapter->msix_entries ? "MSI-X" :
2117 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2118 adapter->num_rx_queues, adapter->num_tx_queues);
2119 switch (hw->mac.type) {
2120 case e1000_i350:
2121 igb_set_eee_i350(hw);
2122 break;
2123 default:
2124 break;
2127 pm_runtime_put_noidle(&pdev->dev);
2128 return 0;
2130 err_register:
2131 igb_release_hw_control(adapter);
2132 err_eeprom:
2133 if (!igb_check_reset_block(hw))
2134 igb_reset_phy(hw);
2136 if (hw->flash_address)
2137 iounmap(hw->flash_address);
2138 err_sw_init:
2139 igb_clear_interrupt_scheme(adapter);
2140 iounmap(hw->hw_addr);
2141 err_ioremap:
2142 free_netdev(netdev);
2143 err_alloc_etherdev:
2144 pci_release_selected_regions(pdev,
2145 pci_select_bars(pdev, IORESOURCE_MEM));
2146 err_pci_reg:
2147 err_dma:
2148 pci_disable_device(pdev);
2149 return err;
2153 * igb_remove - Device Removal Routine
2154 * @pdev: PCI device information struct
2156 * igb_remove is called by the PCI subsystem to alert the driver
2157 * that it should release a PCI device. The could be caused by a
2158 * Hot-Plug event, or because the driver is going to be removed from
2159 * memory.
2161 static void __devexit igb_remove(struct pci_dev *pdev)
2163 struct net_device *netdev = pci_get_drvdata(pdev);
2164 struct igb_adapter *adapter = netdev_priv(netdev);
2165 struct e1000_hw *hw = &adapter->hw;
2167 pm_runtime_get_noresume(&pdev->dev);
2170 * The watchdog timer may be rescheduled, so explicitly
2171 * disable watchdog from being rescheduled.
2173 set_bit(__IGB_DOWN, &adapter->state);
2174 del_timer_sync(&adapter->watchdog_timer);
2175 del_timer_sync(&adapter->phy_info_timer);
2177 cancel_work_sync(&adapter->reset_task);
2178 cancel_work_sync(&adapter->watchdog_task);
2180 #ifdef CONFIG_IGB_DCA
2181 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2182 dev_info(&pdev->dev, "DCA disabled\n");
2183 dca_remove_requester(&pdev->dev);
2184 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2185 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2187 #endif
2189 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2190 * would have already happened in close and is redundant. */
2191 igb_release_hw_control(adapter);
2193 unregister_netdev(netdev);
2195 igb_clear_interrupt_scheme(adapter);
2197 #ifdef CONFIG_PCI_IOV
2198 /* reclaim resources allocated to VFs */
2199 if (adapter->vf_data) {
2200 /* disable iov and allow time for transactions to clear */
2201 if (!igb_check_vf_assignment(adapter)) {
2202 pci_disable_sriov(pdev);
2203 msleep(500);
2204 } else {
2205 dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2208 kfree(adapter->vf_data);
2209 adapter->vf_data = NULL;
2210 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2211 wrfl();
2212 msleep(100);
2213 dev_info(&pdev->dev, "IOV Disabled\n");
2215 #endif
2217 iounmap(hw->hw_addr);
2218 if (hw->flash_address)
2219 iounmap(hw->flash_address);
2220 pci_release_selected_regions(pdev,
2221 pci_select_bars(pdev, IORESOURCE_MEM));
2223 kfree(adapter->shadow_vfta);
2224 free_netdev(netdev);
2226 pci_disable_pcie_error_reporting(pdev);
2228 pci_disable_device(pdev);
2232 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2233 * @adapter: board private structure to initialize
2235 * This function initializes the vf specific data storage and then attempts to
2236 * allocate the VFs. The reason for ordering it this way is because it is much
2237 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2238 * the memory for the VFs.
2240 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2242 #ifdef CONFIG_PCI_IOV
2243 struct pci_dev *pdev = adapter->pdev;
2244 int old_vfs = igb_find_enabled_vfs(adapter);
2245 int i;
2247 if (old_vfs) {
2248 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2249 "max_vfs setting of %d\n", old_vfs, max_vfs);
2250 adapter->vfs_allocated_count = old_vfs;
2253 if (!adapter->vfs_allocated_count)
2254 return;
2256 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2257 sizeof(struct vf_data_storage), GFP_KERNEL);
2258 /* if allocation failed then we do not support SR-IOV */
2259 if (!adapter->vf_data) {
2260 adapter->vfs_allocated_count = 0;
2261 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2262 "Data Storage\n");
2263 goto out;
2266 if (!old_vfs) {
2267 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2268 goto err_out;
2270 dev_info(&pdev->dev, "%d VFs allocated\n",
2271 adapter->vfs_allocated_count);
2272 for (i = 0; i < adapter->vfs_allocated_count; i++)
2273 igb_vf_configure(adapter, i);
2275 /* DMA Coalescing is not supported in IOV mode. */
2276 adapter->flags &= ~IGB_FLAG_DMAC;
2277 goto out;
2278 err_out:
2279 kfree(adapter->vf_data);
2280 adapter->vf_data = NULL;
2281 adapter->vfs_allocated_count = 0;
2282 out:
2283 return;
2284 #endif /* CONFIG_PCI_IOV */
2288 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2289 * @adapter: board private structure to initialize
2291 * igb_init_hw_timer initializes the function pointer and values for the hw
2292 * timer found in hardware.
2294 static void igb_init_hw_timer(struct igb_adapter *adapter)
2296 struct e1000_hw *hw = &adapter->hw;
2298 switch (hw->mac.type) {
2299 case e1000_i350:
2300 case e1000_82580:
2301 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2302 adapter->cycles.read = igb_read_clock;
2303 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2304 adapter->cycles.mult = 1;
2306 * The 82580 timesync updates the system timer every 8ns by 8ns
2307 * and the value cannot be shifted. Instead we need to shift
2308 * the registers to generate a 64bit timer value. As a result
2309 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2310 * 24 in order to generate a larger value for synchronization.
2312 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2313 /* disable system timer temporarily by setting bit 31 */
2314 wr32(E1000_TSAUXC, 0x80000000);
2315 wrfl();
2317 /* Set registers so that rollover occurs soon to test this. */
2318 wr32(E1000_SYSTIMR, 0x00000000);
2319 wr32(E1000_SYSTIML, 0x80000000);
2320 wr32(E1000_SYSTIMH, 0x000000FF);
2321 wrfl();
2323 /* enable system timer by clearing bit 31 */
2324 wr32(E1000_TSAUXC, 0x0);
2325 wrfl();
2327 timecounter_init(&adapter->clock,
2328 &adapter->cycles,
2329 ktime_to_ns(ktime_get_real()));
2331 * Synchronize our NIC clock against system wall clock. NIC
2332 * time stamp reading requires ~3us per sample, each sample
2333 * was pretty stable even under load => only require 10
2334 * samples for each offset comparison.
2336 memset(&adapter->compare, 0, sizeof(adapter->compare));
2337 adapter->compare.source = &adapter->clock;
2338 adapter->compare.target = ktime_get_real;
2339 adapter->compare.num_samples = 10;
2340 timecompare_update(&adapter->compare, 0);
2341 break;
2342 case e1000_82576:
2344 * Initialize hardware timer: we keep it running just in case
2345 * that some program needs it later on.
2347 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2348 adapter->cycles.read = igb_read_clock;
2349 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2350 adapter->cycles.mult = 1;
2352 * Scale the NIC clock cycle by a large factor so that
2353 * relatively small clock corrections can be added or
2354 * subtracted at each clock tick. The drawbacks of a large
2355 * factor are a) that the clock register overflows more quickly
2356 * (not such a big deal) and b) that the increment per tick has
2357 * to fit into 24 bits. As a result we need to use a shift of
2358 * 19 so we can fit a value of 16 into the TIMINCA register.
2360 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2361 wr32(E1000_TIMINCA,
2362 (1 << E1000_TIMINCA_16NS_SHIFT) |
2363 (16 << IGB_82576_TSYNC_SHIFT));
2365 /* Set registers so that rollover occurs soon to test this. */
2366 wr32(E1000_SYSTIML, 0x00000000);
2367 wr32(E1000_SYSTIMH, 0xFF800000);
2368 wrfl();
2370 timecounter_init(&adapter->clock,
2371 &adapter->cycles,
2372 ktime_to_ns(ktime_get_real()));
2374 * Synchronize our NIC clock against system wall clock. NIC
2375 * time stamp reading requires ~3us per sample, each sample
2376 * was pretty stable even under load => only require 10
2377 * samples for each offset comparison.
2379 memset(&adapter->compare, 0, sizeof(adapter->compare));
2380 adapter->compare.source = &adapter->clock;
2381 adapter->compare.target = ktime_get_real;
2382 adapter->compare.num_samples = 10;
2383 timecompare_update(&adapter->compare, 0);
2384 break;
2385 case e1000_82575:
2386 /* 82575 does not support timesync */
2387 default:
2388 break;
2394 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2395 * @adapter: board private structure to initialize
2397 * igb_sw_init initializes the Adapter private data structure.
2398 * Fields are initialized based on PCI device information and
2399 * OS network device settings (MTU size).
2401 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2403 struct e1000_hw *hw = &adapter->hw;
2404 struct net_device *netdev = adapter->netdev;
2405 struct pci_dev *pdev = adapter->pdev;
2407 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2409 /* set default ring sizes */
2410 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2411 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2413 /* set default ITR values */
2414 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2415 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2417 /* set default work limits */
2418 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2420 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2421 VLAN_HLEN;
2422 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2424 adapter->node = -1;
2426 spin_lock_init(&adapter->stats64_lock);
2427 #ifdef CONFIG_PCI_IOV
2428 switch (hw->mac.type) {
2429 case e1000_82576:
2430 case e1000_i350:
2431 if (max_vfs > 7) {
2432 dev_warn(&pdev->dev,
2433 "Maximum of 7 VFs per PF, using max\n");
2434 adapter->vfs_allocated_count = 7;
2435 } else
2436 adapter->vfs_allocated_count = max_vfs;
2437 break;
2438 default:
2439 break;
2441 #endif /* CONFIG_PCI_IOV */
2442 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2443 /* i350 cannot do RSS and SR-IOV at the same time */
2444 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2445 adapter->rss_queues = 1;
2448 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2449 * then we should combine the queues into a queue pair in order to
2450 * conserve interrupts due to limited supply
2452 if ((adapter->rss_queues > 4) ||
2453 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2454 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2456 /* Setup and initialize a copy of the hw vlan table array */
2457 adapter->shadow_vfta = kzalloc(sizeof(u32) *
2458 E1000_VLAN_FILTER_TBL_SIZE,
2459 GFP_ATOMIC);
2461 /* This call may decrease the number of queues */
2462 if (igb_init_interrupt_scheme(adapter)) {
2463 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2464 return -ENOMEM;
2467 igb_probe_vfs(adapter);
2469 /* Explicitly disable IRQ since the NIC can be in any state. */
2470 igb_irq_disable(adapter);
2472 if (hw->mac.type == e1000_i350)
2473 adapter->flags &= ~IGB_FLAG_DMAC;
2475 set_bit(__IGB_DOWN, &adapter->state);
2476 return 0;
2480 * igb_open - Called when a network interface is made active
2481 * @netdev: network interface device structure
2483 * Returns 0 on success, negative value on failure
2485 * The open entry point is called when a network interface is made
2486 * active by the system (IFF_UP). At this point all resources needed
2487 * for transmit and receive operations are allocated, the interrupt
2488 * handler is registered with the OS, the watchdog timer is started,
2489 * and the stack is notified that the interface is ready.
2491 static int __igb_open(struct net_device *netdev, bool resuming)
2493 struct igb_adapter *adapter = netdev_priv(netdev);
2494 struct e1000_hw *hw = &adapter->hw;
2495 struct pci_dev *pdev = adapter->pdev;
2496 int err;
2497 int i;
2499 /* disallow open during test */
2500 if (test_bit(__IGB_TESTING, &adapter->state)) {
2501 WARN_ON(resuming);
2502 return -EBUSY;
2505 if (!resuming)
2506 pm_runtime_get_sync(&pdev->dev);
2508 netif_carrier_off(netdev);
2510 /* allocate transmit descriptors */
2511 err = igb_setup_all_tx_resources(adapter);
2512 if (err)
2513 goto err_setup_tx;
2515 /* allocate receive descriptors */
2516 err = igb_setup_all_rx_resources(adapter);
2517 if (err)
2518 goto err_setup_rx;
2520 igb_power_up_link(adapter);
2522 /* before we allocate an interrupt, we must be ready to handle it.
2523 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2524 * as soon as we call pci_request_irq, so we have to setup our
2525 * clean_rx handler before we do so. */
2526 igb_configure(adapter);
2528 err = igb_request_irq(adapter);
2529 if (err)
2530 goto err_req_irq;
2532 /* From here on the code is the same as igb_up() */
2533 clear_bit(__IGB_DOWN, &adapter->state);
2535 for (i = 0; i < adapter->num_q_vectors; i++)
2536 napi_enable(&(adapter->q_vector[i]->napi));
2538 /* Clear any pending interrupts. */
2539 rd32(E1000_ICR);
2541 igb_irq_enable(adapter);
2543 /* notify VFs that reset has been completed */
2544 if (adapter->vfs_allocated_count) {
2545 u32 reg_data = rd32(E1000_CTRL_EXT);
2546 reg_data |= E1000_CTRL_EXT_PFRSTD;
2547 wr32(E1000_CTRL_EXT, reg_data);
2550 netif_tx_start_all_queues(netdev);
2552 if (!resuming)
2553 pm_runtime_put(&pdev->dev);
2555 /* start the watchdog. */
2556 hw->mac.get_link_status = 1;
2557 schedule_work(&adapter->watchdog_task);
2559 return 0;
2561 err_req_irq:
2562 igb_release_hw_control(adapter);
2563 igb_power_down_link(adapter);
2564 igb_free_all_rx_resources(adapter);
2565 err_setup_rx:
2566 igb_free_all_tx_resources(adapter);
2567 err_setup_tx:
2568 igb_reset(adapter);
2569 if (!resuming)
2570 pm_runtime_put(&pdev->dev);
2572 return err;
2575 static int igb_open(struct net_device *netdev)
2577 return __igb_open(netdev, false);
2581 * igb_close - Disables a network interface
2582 * @netdev: network interface device structure
2584 * Returns 0, this is not allowed to fail
2586 * The close entry point is called when an interface is de-activated
2587 * by the OS. The hardware is still under the driver's control, but
2588 * needs to be disabled. A global MAC reset is issued to stop the
2589 * hardware, and all transmit and receive resources are freed.
2591 static int __igb_close(struct net_device *netdev, bool suspending)
2593 struct igb_adapter *adapter = netdev_priv(netdev);
2594 struct pci_dev *pdev = adapter->pdev;
2596 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2598 if (!suspending)
2599 pm_runtime_get_sync(&pdev->dev);
2601 igb_down(adapter);
2602 igb_free_irq(adapter);
2604 igb_free_all_tx_resources(adapter);
2605 igb_free_all_rx_resources(adapter);
2607 if (!suspending)
2608 pm_runtime_put_sync(&pdev->dev);
2609 return 0;
2612 static int igb_close(struct net_device *netdev)
2614 return __igb_close(netdev, false);
2618 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2619 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2621 * Return 0 on success, negative on failure
2623 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2625 struct device *dev = tx_ring->dev;
2626 int orig_node = dev_to_node(dev);
2627 int size;
2629 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2630 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2631 if (!tx_ring->tx_buffer_info)
2632 tx_ring->tx_buffer_info = vzalloc(size);
2633 if (!tx_ring->tx_buffer_info)
2634 goto err;
2636 /* round up to nearest 4K */
2637 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2638 tx_ring->size = ALIGN(tx_ring->size, 4096);
2640 set_dev_node(dev, tx_ring->numa_node);
2641 tx_ring->desc = dma_alloc_coherent(dev,
2642 tx_ring->size,
2643 &tx_ring->dma,
2644 GFP_KERNEL);
2645 set_dev_node(dev, orig_node);
2646 if (!tx_ring->desc)
2647 tx_ring->desc = dma_alloc_coherent(dev,
2648 tx_ring->size,
2649 &tx_ring->dma,
2650 GFP_KERNEL);
2652 if (!tx_ring->desc)
2653 goto err;
2655 tx_ring->next_to_use = 0;
2656 tx_ring->next_to_clean = 0;
2658 return 0;
2660 err:
2661 vfree(tx_ring->tx_buffer_info);
2662 dev_err(dev,
2663 "Unable to allocate memory for the transmit descriptor ring\n");
2664 return -ENOMEM;
2668 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2669 * (Descriptors) for all queues
2670 * @adapter: board private structure
2672 * Return 0 on success, negative on failure
2674 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2676 struct pci_dev *pdev = adapter->pdev;
2677 int i, err = 0;
2679 for (i = 0; i < adapter->num_tx_queues; i++) {
2680 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2681 if (err) {
2682 dev_err(&pdev->dev,
2683 "Allocation for Tx Queue %u failed\n", i);
2684 for (i--; i >= 0; i--)
2685 igb_free_tx_resources(adapter->tx_ring[i]);
2686 break;
2690 return err;
2694 * igb_setup_tctl - configure the transmit control registers
2695 * @adapter: Board private structure
2697 void igb_setup_tctl(struct igb_adapter *adapter)
2699 struct e1000_hw *hw = &adapter->hw;
2700 u32 tctl;
2702 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2703 wr32(E1000_TXDCTL(0), 0);
2705 /* Program the Transmit Control Register */
2706 tctl = rd32(E1000_TCTL);
2707 tctl &= ~E1000_TCTL_CT;
2708 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2709 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2711 igb_config_collision_dist(hw);
2713 /* Enable transmits */
2714 tctl |= E1000_TCTL_EN;
2716 wr32(E1000_TCTL, tctl);
2720 * igb_configure_tx_ring - Configure transmit ring after Reset
2721 * @adapter: board private structure
2722 * @ring: tx ring to configure
2724 * Configure a transmit ring after a reset.
2726 void igb_configure_tx_ring(struct igb_adapter *adapter,
2727 struct igb_ring *ring)
2729 struct e1000_hw *hw = &adapter->hw;
2730 u32 txdctl = 0;
2731 u64 tdba = ring->dma;
2732 int reg_idx = ring->reg_idx;
2734 /* disable the queue */
2735 wr32(E1000_TXDCTL(reg_idx), 0);
2736 wrfl();
2737 mdelay(10);
2739 wr32(E1000_TDLEN(reg_idx),
2740 ring->count * sizeof(union e1000_adv_tx_desc));
2741 wr32(E1000_TDBAL(reg_idx),
2742 tdba & 0x00000000ffffffffULL);
2743 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2745 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2746 wr32(E1000_TDH(reg_idx), 0);
2747 writel(0, ring->tail);
2749 txdctl |= IGB_TX_PTHRESH;
2750 txdctl |= IGB_TX_HTHRESH << 8;
2751 txdctl |= IGB_TX_WTHRESH << 16;
2753 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2754 wr32(E1000_TXDCTL(reg_idx), txdctl);
2758 * igb_configure_tx - Configure transmit Unit after Reset
2759 * @adapter: board private structure
2761 * Configure the Tx unit of the MAC after a reset.
2763 static void igb_configure_tx(struct igb_adapter *adapter)
2765 int i;
2767 for (i = 0; i < adapter->num_tx_queues; i++)
2768 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2772 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2773 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2775 * Returns 0 on success, negative on failure
2777 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2779 struct device *dev = rx_ring->dev;
2780 int orig_node = dev_to_node(dev);
2781 int size, desc_len;
2783 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2784 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2785 if (!rx_ring->rx_buffer_info)
2786 rx_ring->rx_buffer_info = vzalloc(size);
2787 if (!rx_ring->rx_buffer_info)
2788 goto err;
2790 desc_len = sizeof(union e1000_adv_rx_desc);
2792 /* Round up to nearest 4K */
2793 rx_ring->size = rx_ring->count * desc_len;
2794 rx_ring->size = ALIGN(rx_ring->size, 4096);
2796 set_dev_node(dev, rx_ring->numa_node);
2797 rx_ring->desc = dma_alloc_coherent(dev,
2798 rx_ring->size,
2799 &rx_ring->dma,
2800 GFP_KERNEL);
2801 set_dev_node(dev, orig_node);
2802 if (!rx_ring->desc)
2803 rx_ring->desc = dma_alloc_coherent(dev,
2804 rx_ring->size,
2805 &rx_ring->dma,
2806 GFP_KERNEL);
2808 if (!rx_ring->desc)
2809 goto err;
2811 rx_ring->next_to_clean = 0;
2812 rx_ring->next_to_use = 0;
2814 return 0;
2816 err:
2817 vfree(rx_ring->rx_buffer_info);
2818 rx_ring->rx_buffer_info = NULL;
2819 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2820 " ring\n");
2821 return -ENOMEM;
2825 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2826 * (Descriptors) for all queues
2827 * @adapter: board private structure
2829 * Return 0 on success, negative on failure
2831 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2833 struct pci_dev *pdev = adapter->pdev;
2834 int i, err = 0;
2836 for (i = 0; i < adapter->num_rx_queues; i++) {
2837 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2838 if (err) {
2839 dev_err(&pdev->dev,
2840 "Allocation for Rx Queue %u failed\n", i);
2841 for (i--; i >= 0; i--)
2842 igb_free_rx_resources(adapter->rx_ring[i]);
2843 break;
2847 return err;
2851 * igb_setup_mrqc - configure the multiple receive queue control registers
2852 * @adapter: Board private structure
2854 static void igb_setup_mrqc(struct igb_adapter *adapter)
2856 struct e1000_hw *hw = &adapter->hw;
2857 u32 mrqc, rxcsum;
2858 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2859 union e1000_reta {
2860 u32 dword;
2861 u8 bytes[4];
2862 } reta;
2863 static const u8 rsshash[40] = {
2864 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2865 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2866 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2867 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2869 /* Fill out hash function seeds */
2870 for (j = 0; j < 10; j++) {
2871 u32 rsskey = rsshash[(j * 4)];
2872 rsskey |= rsshash[(j * 4) + 1] << 8;
2873 rsskey |= rsshash[(j * 4) + 2] << 16;
2874 rsskey |= rsshash[(j * 4) + 3] << 24;
2875 array_wr32(E1000_RSSRK(0), j, rsskey);
2878 num_rx_queues = adapter->rss_queues;
2880 if (adapter->vfs_allocated_count) {
2881 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2882 switch (hw->mac.type) {
2883 case e1000_i350:
2884 case e1000_82580:
2885 num_rx_queues = 1;
2886 shift = 0;
2887 break;
2888 case e1000_82576:
2889 shift = 3;
2890 num_rx_queues = 2;
2891 break;
2892 case e1000_82575:
2893 shift = 2;
2894 shift2 = 6;
2895 default:
2896 break;
2898 } else {
2899 if (hw->mac.type == e1000_82575)
2900 shift = 6;
2903 for (j = 0; j < (32 * 4); j++) {
2904 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2905 if (shift2)
2906 reta.bytes[j & 3] |= num_rx_queues << shift2;
2907 if ((j & 3) == 3)
2908 wr32(E1000_RETA(j >> 2), reta.dword);
2912 * Disable raw packet checksumming so that RSS hash is placed in
2913 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2914 * offloads as they are enabled by default
2916 rxcsum = rd32(E1000_RXCSUM);
2917 rxcsum |= E1000_RXCSUM_PCSD;
2919 if (adapter->hw.mac.type >= e1000_82576)
2920 /* Enable Receive Checksum Offload for SCTP */
2921 rxcsum |= E1000_RXCSUM_CRCOFL;
2923 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2924 wr32(E1000_RXCSUM, rxcsum);
2926 /* If VMDq is enabled then we set the appropriate mode for that, else
2927 * we default to RSS so that an RSS hash is calculated per packet even
2928 * if we are only using one queue */
2929 if (adapter->vfs_allocated_count) {
2930 if (hw->mac.type > e1000_82575) {
2931 /* Set the default pool for the PF's first queue */
2932 u32 vtctl = rd32(E1000_VT_CTL);
2933 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2934 E1000_VT_CTL_DISABLE_DEF_POOL);
2935 vtctl |= adapter->vfs_allocated_count <<
2936 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2937 wr32(E1000_VT_CTL, vtctl);
2939 if (adapter->rss_queues > 1)
2940 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2941 else
2942 mrqc = E1000_MRQC_ENABLE_VMDQ;
2943 } else {
2944 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2946 igb_vmm_control(adapter);
2949 * Generate RSS hash based on TCP port numbers and/or
2950 * IPv4/v6 src and dst addresses since UDP cannot be
2951 * hashed reliably due to IP fragmentation
2953 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2954 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2955 E1000_MRQC_RSS_FIELD_IPV6 |
2956 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2957 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2959 wr32(E1000_MRQC, mrqc);
2963 * igb_setup_rctl - configure the receive control registers
2964 * @adapter: Board private structure
2966 void igb_setup_rctl(struct igb_adapter *adapter)
2968 struct e1000_hw *hw = &adapter->hw;
2969 u32 rctl;
2971 rctl = rd32(E1000_RCTL);
2973 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2974 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2976 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2977 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2980 * enable stripping of CRC. It's unlikely this will break BMC
2981 * redirection as it did with e1000. Newer features require
2982 * that the HW strips the CRC.
2984 rctl |= E1000_RCTL_SECRC;
2986 /* disable store bad packets and clear size bits. */
2987 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2989 /* enable LPE to prevent packets larger than max_frame_size */
2990 rctl |= E1000_RCTL_LPE;
2992 /* disable queue 0 to prevent tail write w/o re-config */
2993 wr32(E1000_RXDCTL(0), 0);
2995 /* Attention!!! For SR-IOV PF driver operations you must enable
2996 * queue drop for all VF and PF queues to prevent head of line blocking
2997 * if an un-trusted VF does not provide descriptors to hardware.
2999 if (adapter->vfs_allocated_count) {
3000 /* set all queue drop enable bits */
3001 wr32(E1000_QDE, ALL_QUEUES);
3004 wr32(E1000_RCTL, rctl);
3007 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3008 int vfn)
3010 struct e1000_hw *hw = &adapter->hw;
3011 u32 vmolr;
3013 /* if it isn't the PF check to see if VFs are enabled and
3014 * increase the size to support vlan tags */
3015 if (vfn < adapter->vfs_allocated_count &&
3016 adapter->vf_data[vfn].vlans_enabled)
3017 size += VLAN_TAG_SIZE;
3019 vmolr = rd32(E1000_VMOLR(vfn));
3020 vmolr &= ~E1000_VMOLR_RLPML_MASK;
3021 vmolr |= size | E1000_VMOLR_LPE;
3022 wr32(E1000_VMOLR(vfn), vmolr);
3024 return 0;
3028 * igb_rlpml_set - set maximum receive packet size
3029 * @adapter: board private structure
3031 * Configure maximum receivable packet size.
3033 static void igb_rlpml_set(struct igb_adapter *adapter)
3035 u32 max_frame_size = adapter->max_frame_size;
3036 struct e1000_hw *hw = &adapter->hw;
3037 u16 pf_id = adapter->vfs_allocated_count;
3039 if (pf_id) {
3040 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3042 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3043 * to our max jumbo frame size, in case we need to enable
3044 * jumbo frames on one of the rings later.
3045 * This will not pass over-length frames into the default
3046 * queue because it's gated by the VMOLR.RLPML.
3048 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3051 wr32(E1000_RLPML, max_frame_size);
3054 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3055 int vfn, bool aupe)
3057 struct e1000_hw *hw = &adapter->hw;
3058 u32 vmolr;
3061 * This register exists only on 82576 and newer so if we are older then
3062 * we should exit and do nothing
3064 if (hw->mac.type < e1000_82576)
3065 return;
3067 vmolr = rd32(E1000_VMOLR(vfn));
3068 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3069 if (aupe)
3070 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3071 else
3072 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3074 /* clear all bits that might not be set */
3075 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3077 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3078 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3080 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3081 * multicast packets
3083 if (vfn <= adapter->vfs_allocated_count)
3084 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3086 wr32(E1000_VMOLR(vfn), vmolr);
3090 * igb_configure_rx_ring - Configure a receive ring after Reset
3091 * @adapter: board private structure
3092 * @ring: receive ring to be configured
3094 * Configure the Rx unit of the MAC after a reset.
3096 void igb_configure_rx_ring(struct igb_adapter *adapter,
3097 struct igb_ring *ring)
3099 struct e1000_hw *hw = &adapter->hw;
3100 u64 rdba = ring->dma;
3101 int reg_idx = ring->reg_idx;
3102 u32 srrctl = 0, rxdctl = 0;
3104 /* disable the queue */
3105 wr32(E1000_RXDCTL(reg_idx), 0);
3107 /* Set DMA base address registers */
3108 wr32(E1000_RDBAL(reg_idx),
3109 rdba & 0x00000000ffffffffULL);
3110 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3111 wr32(E1000_RDLEN(reg_idx),
3112 ring->count * sizeof(union e1000_adv_rx_desc));
3114 /* initialize head and tail */
3115 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3116 wr32(E1000_RDH(reg_idx), 0);
3117 writel(0, ring->tail);
3119 /* set descriptor configuration */
3120 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3121 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3122 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3123 #else
3124 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3125 #endif
3126 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3127 if (hw->mac.type >= e1000_82580)
3128 srrctl |= E1000_SRRCTL_TIMESTAMP;
3129 /* Only set Drop Enable if we are supporting multiple queues */
3130 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3131 srrctl |= E1000_SRRCTL_DROP_EN;
3133 wr32(E1000_SRRCTL(reg_idx), srrctl);
3135 /* set filtering for VMDQ pools */
3136 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3138 rxdctl |= IGB_RX_PTHRESH;
3139 rxdctl |= IGB_RX_HTHRESH << 8;
3140 rxdctl |= IGB_RX_WTHRESH << 16;
3142 /* enable receive descriptor fetching */
3143 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3144 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3148 * igb_configure_rx - Configure receive Unit after Reset
3149 * @adapter: board private structure
3151 * Configure the Rx unit of the MAC after a reset.
3153 static void igb_configure_rx(struct igb_adapter *adapter)
3155 int i;
3157 /* set UTA to appropriate mode */
3158 igb_set_uta(adapter);
3160 /* set the correct pool for the PF default MAC address in entry 0 */
3161 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3162 adapter->vfs_allocated_count);
3164 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3165 * the Base and Length of the Rx Descriptor Ring */
3166 for (i = 0; i < adapter->num_rx_queues; i++)
3167 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3171 * igb_free_tx_resources - Free Tx Resources per Queue
3172 * @tx_ring: Tx descriptor ring for a specific queue
3174 * Free all transmit software resources
3176 void igb_free_tx_resources(struct igb_ring *tx_ring)
3178 igb_clean_tx_ring(tx_ring);
3180 vfree(tx_ring->tx_buffer_info);
3181 tx_ring->tx_buffer_info = NULL;
3183 /* if not set, then don't free */
3184 if (!tx_ring->desc)
3185 return;
3187 dma_free_coherent(tx_ring->dev, tx_ring->size,
3188 tx_ring->desc, tx_ring->dma);
3190 tx_ring->desc = NULL;
3194 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3195 * @adapter: board private structure
3197 * Free all transmit software resources
3199 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3201 int i;
3203 for (i = 0; i < adapter->num_tx_queues; i++)
3204 igb_free_tx_resources(adapter->tx_ring[i]);
3207 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3208 struct igb_tx_buffer *tx_buffer)
3210 if (tx_buffer->skb) {
3211 dev_kfree_skb_any(tx_buffer->skb);
3212 if (tx_buffer->dma)
3213 dma_unmap_single(ring->dev,
3214 tx_buffer->dma,
3215 tx_buffer->length,
3216 DMA_TO_DEVICE);
3217 } else if (tx_buffer->dma) {
3218 dma_unmap_page(ring->dev,
3219 tx_buffer->dma,
3220 tx_buffer->length,
3221 DMA_TO_DEVICE);
3223 tx_buffer->next_to_watch = NULL;
3224 tx_buffer->skb = NULL;
3225 tx_buffer->dma = 0;
3226 /* buffer_info must be completely set up in the transmit path */
3230 * igb_clean_tx_ring - Free Tx Buffers
3231 * @tx_ring: ring to be cleaned
3233 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3235 struct igb_tx_buffer *buffer_info;
3236 unsigned long size;
3237 u16 i;
3239 if (!tx_ring->tx_buffer_info)
3240 return;
3241 /* Free all the Tx ring sk_buffs */
3243 for (i = 0; i < tx_ring->count; i++) {
3244 buffer_info = &tx_ring->tx_buffer_info[i];
3245 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3247 netdev_tx_reset_queue(txring_txq(tx_ring));
3249 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3250 memset(tx_ring->tx_buffer_info, 0, size);
3252 /* Zero out the descriptor ring */
3253 memset(tx_ring->desc, 0, tx_ring->size);
3255 tx_ring->next_to_use = 0;
3256 tx_ring->next_to_clean = 0;
3260 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3261 * @adapter: board private structure
3263 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3265 int i;
3267 for (i = 0; i < adapter->num_tx_queues; i++)
3268 igb_clean_tx_ring(adapter->tx_ring[i]);
3272 * igb_free_rx_resources - Free Rx Resources
3273 * @rx_ring: ring to clean the resources from
3275 * Free all receive software resources
3277 void igb_free_rx_resources(struct igb_ring *rx_ring)
3279 igb_clean_rx_ring(rx_ring);
3281 vfree(rx_ring->rx_buffer_info);
3282 rx_ring->rx_buffer_info = NULL;
3284 /* if not set, then don't free */
3285 if (!rx_ring->desc)
3286 return;
3288 dma_free_coherent(rx_ring->dev, rx_ring->size,
3289 rx_ring->desc, rx_ring->dma);
3291 rx_ring->desc = NULL;
3295 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3296 * @adapter: board private structure
3298 * Free all receive software resources
3300 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3302 int i;
3304 for (i = 0; i < adapter->num_rx_queues; i++)
3305 igb_free_rx_resources(adapter->rx_ring[i]);
3309 * igb_clean_rx_ring - Free Rx Buffers per Queue
3310 * @rx_ring: ring to free buffers from
3312 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3314 unsigned long size;
3315 u16 i;
3317 if (!rx_ring->rx_buffer_info)
3318 return;
3320 /* Free all the Rx ring sk_buffs */
3321 for (i = 0; i < rx_ring->count; i++) {
3322 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3323 if (buffer_info->dma) {
3324 dma_unmap_single(rx_ring->dev,
3325 buffer_info->dma,
3326 IGB_RX_HDR_LEN,
3327 DMA_FROM_DEVICE);
3328 buffer_info->dma = 0;
3331 if (buffer_info->skb) {
3332 dev_kfree_skb(buffer_info->skb);
3333 buffer_info->skb = NULL;
3335 if (buffer_info->page_dma) {
3336 dma_unmap_page(rx_ring->dev,
3337 buffer_info->page_dma,
3338 PAGE_SIZE / 2,
3339 DMA_FROM_DEVICE);
3340 buffer_info->page_dma = 0;
3342 if (buffer_info->page) {
3343 put_page(buffer_info->page);
3344 buffer_info->page = NULL;
3345 buffer_info->page_offset = 0;
3349 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3350 memset(rx_ring->rx_buffer_info, 0, size);
3352 /* Zero out the descriptor ring */
3353 memset(rx_ring->desc, 0, rx_ring->size);
3355 rx_ring->next_to_clean = 0;
3356 rx_ring->next_to_use = 0;
3360 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3361 * @adapter: board private structure
3363 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3365 int i;
3367 for (i = 0; i < adapter->num_rx_queues; i++)
3368 igb_clean_rx_ring(adapter->rx_ring[i]);
3372 * igb_set_mac - Change the Ethernet Address of the NIC
3373 * @netdev: network interface device structure
3374 * @p: pointer to an address structure
3376 * Returns 0 on success, negative on failure
3378 static int igb_set_mac(struct net_device *netdev, void *p)
3380 struct igb_adapter *adapter = netdev_priv(netdev);
3381 struct e1000_hw *hw = &adapter->hw;
3382 struct sockaddr *addr = p;
3384 if (!is_valid_ether_addr(addr->sa_data))
3385 return -EADDRNOTAVAIL;
3387 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3388 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3390 /* set the correct pool for the new PF MAC address in entry 0 */
3391 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3392 adapter->vfs_allocated_count);
3394 return 0;
3398 * igb_write_mc_addr_list - write multicast addresses to MTA
3399 * @netdev: network interface device structure
3401 * Writes multicast address list to the MTA hash table.
3402 * Returns: -ENOMEM on failure
3403 * 0 on no addresses written
3404 * X on writing X addresses to MTA
3406 static int igb_write_mc_addr_list(struct net_device *netdev)
3408 struct igb_adapter *adapter = netdev_priv(netdev);
3409 struct e1000_hw *hw = &adapter->hw;
3410 struct netdev_hw_addr *ha;
3411 u8 *mta_list;
3412 int i;
3414 if (netdev_mc_empty(netdev)) {
3415 /* nothing to program, so clear mc list */
3416 igb_update_mc_addr_list(hw, NULL, 0);
3417 igb_restore_vf_multicasts(adapter);
3418 return 0;
3421 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3422 if (!mta_list)
3423 return -ENOMEM;
3425 /* The shared function expects a packed array of only addresses. */
3426 i = 0;
3427 netdev_for_each_mc_addr(ha, netdev)
3428 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3430 igb_update_mc_addr_list(hw, mta_list, i);
3431 kfree(mta_list);
3433 return netdev_mc_count(netdev);
3437 * igb_write_uc_addr_list - write unicast addresses to RAR table
3438 * @netdev: network interface device structure
3440 * Writes unicast address list to the RAR table.
3441 * Returns: -ENOMEM on failure/insufficient address space
3442 * 0 on no addresses written
3443 * X on writing X addresses to the RAR table
3445 static int igb_write_uc_addr_list(struct net_device *netdev)
3447 struct igb_adapter *adapter = netdev_priv(netdev);
3448 struct e1000_hw *hw = &adapter->hw;
3449 unsigned int vfn = adapter->vfs_allocated_count;
3450 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3451 int count = 0;
3453 /* return ENOMEM indicating insufficient memory for addresses */
3454 if (netdev_uc_count(netdev) > rar_entries)
3455 return -ENOMEM;
3457 if (!netdev_uc_empty(netdev) && rar_entries) {
3458 struct netdev_hw_addr *ha;
3460 netdev_for_each_uc_addr(ha, netdev) {
3461 if (!rar_entries)
3462 break;
3463 igb_rar_set_qsel(adapter, ha->addr,
3464 rar_entries--,
3465 vfn);
3466 count++;
3469 /* write the addresses in reverse order to avoid write combining */
3470 for (; rar_entries > 0 ; rar_entries--) {
3471 wr32(E1000_RAH(rar_entries), 0);
3472 wr32(E1000_RAL(rar_entries), 0);
3474 wrfl();
3476 return count;
3480 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3481 * @netdev: network interface device structure
3483 * The set_rx_mode entry point is called whenever the unicast or multicast
3484 * address lists or the network interface flags are updated. This routine is
3485 * responsible for configuring the hardware for proper unicast, multicast,
3486 * promiscuous mode, and all-multi behavior.
3488 static void igb_set_rx_mode(struct net_device *netdev)
3490 struct igb_adapter *adapter = netdev_priv(netdev);
3491 struct e1000_hw *hw = &adapter->hw;
3492 unsigned int vfn = adapter->vfs_allocated_count;
3493 u32 rctl, vmolr = 0;
3494 int count;
3496 /* Check for Promiscuous and All Multicast modes */
3497 rctl = rd32(E1000_RCTL);
3499 /* clear the effected bits */
3500 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3502 if (netdev->flags & IFF_PROMISC) {
3503 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3504 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3505 } else {
3506 if (netdev->flags & IFF_ALLMULTI) {
3507 rctl |= E1000_RCTL_MPE;
3508 vmolr |= E1000_VMOLR_MPME;
3509 } else {
3511 * Write addresses to the MTA, if the attempt fails
3512 * then we should just turn on promiscuous mode so
3513 * that we can at least receive multicast traffic
3515 count = igb_write_mc_addr_list(netdev);
3516 if (count < 0) {
3517 rctl |= E1000_RCTL_MPE;
3518 vmolr |= E1000_VMOLR_MPME;
3519 } else if (count) {
3520 vmolr |= E1000_VMOLR_ROMPE;
3524 * Write addresses to available RAR registers, if there is not
3525 * sufficient space to store all the addresses then enable
3526 * unicast promiscuous mode
3528 count = igb_write_uc_addr_list(netdev);
3529 if (count < 0) {
3530 rctl |= E1000_RCTL_UPE;
3531 vmolr |= E1000_VMOLR_ROPE;
3533 rctl |= E1000_RCTL_VFE;
3535 wr32(E1000_RCTL, rctl);
3538 * In order to support SR-IOV and eventually VMDq it is necessary to set
3539 * the VMOLR to enable the appropriate modes. Without this workaround
3540 * we will have issues with VLAN tag stripping not being done for frames
3541 * that are only arriving because we are the default pool
3543 if (hw->mac.type < e1000_82576)
3544 return;
3546 vmolr |= rd32(E1000_VMOLR(vfn)) &
3547 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3548 wr32(E1000_VMOLR(vfn), vmolr);
3549 igb_restore_vf_multicasts(adapter);
3552 static void igb_check_wvbr(struct igb_adapter *adapter)
3554 struct e1000_hw *hw = &adapter->hw;
3555 u32 wvbr = 0;
3557 switch (hw->mac.type) {
3558 case e1000_82576:
3559 case e1000_i350:
3560 if (!(wvbr = rd32(E1000_WVBR)))
3561 return;
3562 break;
3563 default:
3564 break;
3567 adapter->wvbr |= wvbr;
3570 #define IGB_STAGGERED_QUEUE_OFFSET 8
3572 static void igb_spoof_check(struct igb_adapter *adapter)
3574 int j;
3576 if (!adapter->wvbr)
3577 return;
3579 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3580 if (adapter->wvbr & (1 << j) ||
3581 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3582 dev_warn(&adapter->pdev->dev,
3583 "Spoof event(s) detected on VF %d\n", j);
3584 adapter->wvbr &=
3585 ~((1 << j) |
3586 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3591 /* Need to wait a few seconds after link up to get diagnostic information from
3592 * the phy */
3593 static void igb_update_phy_info(unsigned long data)
3595 struct igb_adapter *adapter = (struct igb_adapter *) data;
3596 igb_get_phy_info(&adapter->hw);
3600 * igb_has_link - check shared code for link and determine up/down
3601 * @adapter: pointer to driver private info
3603 bool igb_has_link(struct igb_adapter *adapter)
3605 struct e1000_hw *hw = &adapter->hw;
3606 bool link_active = false;
3607 s32 ret_val = 0;
3609 /* get_link_status is set on LSC (link status) interrupt or
3610 * rx sequence error interrupt. get_link_status will stay
3611 * false until the e1000_check_for_link establishes link
3612 * for copper adapters ONLY
3614 switch (hw->phy.media_type) {
3615 case e1000_media_type_copper:
3616 if (hw->mac.get_link_status) {
3617 ret_val = hw->mac.ops.check_for_link(hw);
3618 link_active = !hw->mac.get_link_status;
3619 } else {
3620 link_active = true;
3622 break;
3623 case e1000_media_type_internal_serdes:
3624 ret_val = hw->mac.ops.check_for_link(hw);
3625 link_active = hw->mac.serdes_has_link;
3626 break;
3627 default:
3628 case e1000_media_type_unknown:
3629 break;
3632 return link_active;
3635 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3637 bool ret = false;
3638 u32 ctrl_ext, thstat;
3640 /* check for thermal sensor event on i350, copper only */
3641 if (hw->mac.type == e1000_i350) {
3642 thstat = rd32(E1000_THSTAT);
3643 ctrl_ext = rd32(E1000_CTRL_EXT);
3645 if ((hw->phy.media_type == e1000_media_type_copper) &&
3646 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3647 ret = !!(thstat & event);
3651 return ret;
3655 * igb_watchdog - Timer Call-back
3656 * @data: pointer to adapter cast into an unsigned long
3658 static void igb_watchdog(unsigned long data)
3660 struct igb_adapter *adapter = (struct igb_adapter *)data;
3661 /* Do the rest outside of interrupt context */
3662 schedule_work(&adapter->watchdog_task);
3665 static void igb_watchdog_task(struct work_struct *work)
3667 struct igb_adapter *adapter = container_of(work,
3668 struct igb_adapter,
3669 watchdog_task);
3670 struct e1000_hw *hw = &adapter->hw;
3671 struct net_device *netdev = adapter->netdev;
3672 u32 link;
3673 int i;
3675 link = igb_has_link(adapter);
3676 if (link) {
3677 /* Cancel scheduled suspend requests. */
3678 pm_runtime_resume(netdev->dev.parent);
3680 if (!netif_carrier_ok(netdev)) {
3681 u32 ctrl;
3682 hw->mac.ops.get_speed_and_duplex(hw,
3683 &adapter->link_speed,
3684 &adapter->link_duplex);
3686 ctrl = rd32(E1000_CTRL);
3687 /* Links status message must follow this format */
3688 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3689 "Duplex, Flow Control: %s\n",
3690 netdev->name,
3691 adapter->link_speed,
3692 adapter->link_duplex == FULL_DUPLEX ?
3693 "Full" : "Half",
3694 (ctrl & E1000_CTRL_TFCE) &&
3695 (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3696 (ctrl & E1000_CTRL_RFCE) ? "RX" :
3697 (ctrl & E1000_CTRL_TFCE) ? "TX" : "None");
3699 /* check for thermal sensor event */
3700 if (igb_thermal_sensor_event(hw,
3701 E1000_THSTAT_LINK_THROTTLE)) {
3702 netdev_info(netdev, "The network adapter link "
3703 "speed was downshifted because it "
3704 "overheated\n");
3707 /* adjust timeout factor according to speed/duplex */
3708 adapter->tx_timeout_factor = 1;
3709 switch (adapter->link_speed) {
3710 case SPEED_10:
3711 adapter->tx_timeout_factor = 14;
3712 break;
3713 case SPEED_100:
3714 /* maybe add some timeout factor ? */
3715 break;
3718 netif_carrier_on(netdev);
3720 igb_ping_all_vfs(adapter);
3721 igb_check_vf_rate_limit(adapter);
3723 /* link state has changed, schedule phy info update */
3724 if (!test_bit(__IGB_DOWN, &adapter->state))
3725 mod_timer(&adapter->phy_info_timer,
3726 round_jiffies(jiffies + 2 * HZ));
3728 } else {
3729 if (netif_carrier_ok(netdev)) {
3730 adapter->link_speed = 0;
3731 adapter->link_duplex = 0;
3733 /* check for thermal sensor event */
3734 if (igb_thermal_sensor_event(hw,
3735 E1000_THSTAT_PWR_DOWN)) {
3736 netdev_err(netdev, "The network adapter was "
3737 "stopped because it overheated\n");
3740 /* Links status message must follow this format */
3741 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3742 netdev->name);
3743 netif_carrier_off(netdev);
3745 igb_ping_all_vfs(adapter);
3747 /* link state has changed, schedule phy info update */
3748 if (!test_bit(__IGB_DOWN, &adapter->state))
3749 mod_timer(&adapter->phy_info_timer,
3750 round_jiffies(jiffies + 2 * HZ));
3752 pm_schedule_suspend(netdev->dev.parent,
3753 MSEC_PER_SEC * 5);
3757 spin_lock(&adapter->stats64_lock);
3758 igb_update_stats(adapter, &adapter->stats64);
3759 spin_unlock(&adapter->stats64_lock);
3761 for (i = 0; i < adapter->num_tx_queues; i++) {
3762 struct igb_ring *tx_ring = adapter->tx_ring[i];
3763 if (!netif_carrier_ok(netdev)) {
3764 /* We've lost link, so the controller stops DMA,
3765 * but we've got queued Tx work that's never going
3766 * to get done, so reset controller to flush Tx.
3767 * (Do the reset outside of interrupt context). */
3768 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3769 adapter->tx_timeout_count++;
3770 schedule_work(&adapter->reset_task);
3771 /* return immediately since reset is imminent */
3772 return;
3776 /* Force detection of hung controller every watchdog period */
3777 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3780 /* Cause software interrupt to ensure rx ring is cleaned */
3781 if (adapter->msix_entries) {
3782 u32 eics = 0;
3783 for (i = 0; i < adapter->num_q_vectors; i++)
3784 eics |= adapter->q_vector[i]->eims_value;
3785 wr32(E1000_EICS, eics);
3786 } else {
3787 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3790 igb_spoof_check(adapter);
3792 /* Reset the timer */
3793 if (!test_bit(__IGB_DOWN, &adapter->state))
3794 mod_timer(&adapter->watchdog_timer,
3795 round_jiffies(jiffies + 2 * HZ));
3798 enum latency_range {
3799 lowest_latency = 0,
3800 low_latency = 1,
3801 bulk_latency = 2,
3802 latency_invalid = 255
3806 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3808 * Stores a new ITR value based on strictly on packet size. This
3809 * algorithm is less sophisticated than that used in igb_update_itr,
3810 * due to the difficulty of synchronizing statistics across multiple
3811 * receive rings. The divisors and thresholds used by this function
3812 * were determined based on theoretical maximum wire speed and testing
3813 * data, in order to minimize response time while increasing bulk
3814 * throughput.
3815 * This functionality is controlled by the InterruptThrottleRate module
3816 * parameter (see igb_param.c)
3817 * NOTE: This function is called only when operating in a multiqueue
3818 * receive environment.
3819 * @q_vector: pointer to q_vector
3821 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3823 int new_val = q_vector->itr_val;
3824 int avg_wire_size = 0;
3825 struct igb_adapter *adapter = q_vector->adapter;
3826 unsigned int packets;
3828 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3829 * ints/sec - ITR timer value of 120 ticks.
3831 if (adapter->link_speed != SPEED_1000) {
3832 new_val = IGB_4K_ITR;
3833 goto set_itr_val;
3836 packets = q_vector->rx.total_packets;
3837 if (packets)
3838 avg_wire_size = q_vector->rx.total_bytes / packets;
3840 packets = q_vector->tx.total_packets;
3841 if (packets)
3842 avg_wire_size = max_t(u32, avg_wire_size,
3843 q_vector->tx.total_bytes / packets);
3845 /* if avg_wire_size isn't set no work was done */
3846 if (!avg_wire_size)
3847 goto clear_counts;
3849 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3850 avg_wire_size += 24;
3852 /* Don't starve jumbo frames */
3853 avg_wire_size = min(avg_wire_size, 3000);
3855 /* Give a little boost to mid-size frames */
3856 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3857 new_val = avg_wire_size / 3;
3858 else
3859 new_val = avg_wire_size / 2;
3861 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3862 if (new_val < IGB_20K_ITR &&
3863 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3864 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3865 new_val = IGB_20K_ITR;
3867 set_itr_val:
3868 if (new_val != q_vector->itr_val) {
3869 q_vector->itr_val = new_val;
3870 q_vector->set_itr = 1;
3872 clear_counts:
3873 q_vector->rx.total_bytes = 0;
3874 q_vector->rx.total_packets = 0;
3875 q_vector->tx.total_bytes = 0;
3876 q_vector->tx.total_packets = 0;
3880 * igb_update_itr - update the dynamic ITR value based on statistics
3881 * Stores a new ITR value based on packets and byte
3882 * counts during the last interrupt. The advantage of per interrupt
3883 * computation is faster updates and more accurate ITR for the current
3884 * traffic pattern. Constants in this function were computed
3885 * based on theoretical maximum wire speed and thresholds were set based
3886 * on testing data as well as attempting to minimize response time
3887 * while increasing bulk throughput.
3888 * this functionality is controlled by the InterruptThrottleRate module
3889 * parameter (see igb_param.c)
3890 * NOTE: These calculations are only valid when operating in a single-
3891 * queue environment.
3892 * @q_vector: pointer to q_vector
3893 * @ring_container: ring info to update the itr for
3895 static void igb_update_itr(struct igb_q_vector *q_vector,
3896 struct igb_ring_container *ring_container)
3898 unsigned int packets = ring_container->total_packets;
3899 unsigned int bytes = ring_container->total_bytes;
3900 u8 itrval = ring_container->itr;
3902 /* no packets, exit with status unchanged */
3903 if (packets == 0)
3904 return;
3906 switch (itrval) {
3907 case lowest_latency:
3908 /* handle TSO and jumbo frames */
3909 if (bytes/packets > 8000)
3910 itrval = bulk_latency;
3911 else if ((packets < 5) && (bytes > 512))
3912 itrval = low_latency;
3913 break;
3914 case low_latency: /* 50 usec aka 20000 ints/s */
3915 if (bytes > 10000) {
3916 /* this if handles the TSO accounting */
3917 if (bytes/packets > 8000) {
3918 itrval = bulk_latency;
3919 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3920 itrval = bulk_latency;
3921 } else if ((packets > 35)) {
3922 itrval = lowest_latency;
3924 } else if (bytes/packets > 2000) {
3925 itrval = bulk_latency;
3926 } else if (packets <= 2 && bytes < 512) {
3927 itrval = lowest_latency;
3929 break;
3930 case bulk_latency: /* 250 usec aka 4000 ints/s */
3931 if (bytes > 25000) {
3932 if (packets > 35)
3933 itrval = low_latency;
3934 } else if (bytes < 1500) {
3935 itrval = low_latency;
3937 break;
3940 /* clear work counters since we have the values we need */
3941 ring_container->total_bytes = 0;
3942 ring_container->total_packets = 0;
3944 /* write updated itr to ring container */
3945 ring_container->itr = itrval;
3948 static void igb_set_itr(struct igb_q_vector *q_vector)
3950 struct igb_adapter *adapter = q_vector->adapter;
3951 u32 new_itr = q_vector->itr_val;
3952 u8 current_itr = 0;
3954 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3955 if (adapter->link_speed != SPEED_1000) {
3956 current_itr = 0;
3957 new_itr = IGB_4K_ITR;
3958 goto set_itr_now;
3961 igb_update_itr(q_vector, &q_vector->tx);
3962 igb_update_itr(q_vector, &q_vector->rx);
3964 current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3966 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3967 if (current_itr == lowest_latency &&
3968 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3969 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3970 current_itr = low_latency;
3972 switch (current_itr) {
3973 /* counts and packets in update_itr are dependent on these numbers */
3974 case lowest_latency:
3975 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3976 break;
3977 case low_latency:
3978 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3979 break;
3980 case bulk_latency:
3981 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
3982 break;
3983 default:
3984 break;
3987 set_itr_now:
3988 if (new_itr != q_vector->itr_val) {
3989 /* this attempts to bias the interrupt rate towards Bulk
3990 * by adding intermediate steps when interrupt rate is
3991 * increasing */
3992 new_itr = new_itr > q_vector->itr_val ?
3993 max((new_itr * q_vector->itr_val) /
3994 (new_itr + (q_vector->itr_val >> 2)),
3995 new_itr) :
3996 new_itr;
3997 /* Don't write the value here; it resets the adapter's
3998 * internal timer, and causes us to delay far longer than
3999 * we should between interrupts. Instead, we write the ITR
4000 * value at the beginning of the next interrupt so the timing
4001 * ends up being correct.
4003 q_vector->itr_val = new_itr;
4004 q_vector->set_itr = 1;
4008 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4009 u32 type_tucmd, u32 mss_l4len_idx)
4011 struct e1000_adv_tx_context_desc *context_desc;
4012 u16 i = tx_ring->next_to_use;
4014 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4016 i++;
4017 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4019 /* set bits to identify this as an advanced context descriptor */
4020 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4022 /* For 82575, context index must be unique per ring. */
4023 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4024 mss_l4len_idx |= tx_ring->reg_idx << 4;
4026 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
4027 context_desc->seqnum_seed = 0;
4028 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
4029 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4032 static int igb_tso(struct igb_ring *tx_ring,
4033 struct igb_tx_buffer *first,
4034 u8 *hdr_len)
4036 struct sk_buff *skb = first->skb;
4037 u32 vlan_macip_lens, type_tucmd;
4038 u32 mss_l4len_idx, l4len;
4040 if (!skb_is_gso(skb))
4041 return 0;
4043 if (skb_header_cloned(skb)) {
4044 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4045 if (err)
4046 return err;
4049 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4050 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4052 if (first->protocol == __constant_htons(ETH_P_IP)) {
4053 struct iphdr *iph = ip_hdr(skb);
4054 iph->tot_len = 0;
4055 iph->check = 0;
4056 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4057 iph->daddr, 0,
4058 IPPROTO_TCP,
4060 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4061 first->tx_flags |= IGB_TX_FLAGS_TSO |
4062 IGB_TX_FLAGS_CSUM |
4063 IGB_TX_FLAGS_IPV4;
4064 } else if (skb_is_gso_v6(skb)) {
4065 ipv6_hdr(skb)->payload_len = 0;
4066 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4067 &ipv6_hdr(skb)->daddr,
4068 0, IPPROTO_TCP, 0);
4069 first->tx_flags |= IGB_TX_FLAGS_TSO |
4070 IGB_TX_FLAGS_CSUM;
4073 /* compute header lengths */
4074 l4len = tcp_hdrlen(skb);
4075 *hdr_len = skb_transport_offset(skb) + l4len;
4077 /* update gso size and bytecount with header size */
4078 first->gso_segs = skb_shinfo(skb)->gso_segs;
4079 first->bytecount += (first->gso_segs - 1) * *hdr_len;
4081 /* MSS L4LEN IDX */
4082 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4083 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4085 /* VLAN MACLEN IPLEN */
4086 vlan_macip_lens = skb_network_header_len(skb);
4087 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4088 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4090 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4092 return 1;
4095 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4097 struct sk_buff *skb = first->skb;
4098 u32 vlan_macip_lens = 0;
4099 u32 mss_l4len_idx = 0;
4100 u32 type_tucmd = 0;
4102 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4103 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4104 return;
4105 } else {
4106 u8 l4_hdr = 0;
4107 switch (first->protocol) {
4108 case __constant_htons(ETH_P_IP):
4109 vlan_macip_lens |= skb_network_header_len(skb);
4110 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4111 l4_hdr = ip_hdr(skb)->protocol;
4112 break;
4113 case __constant_htons(ETH_P_IPV6):
4114 vlan_macip_lens |= skb_network_header_len(skb);
4115 l4_hdr = ipv6_hdr(skb)->nexthdr;
4116 break;
4117 default:
4118 if (unlikely(net_ratelimit())) {
4119 dev_warn(tx_ring->dev,
4120 "partial checksum but proto=%x!\n",
4121 first->protocol);
4123 break;
4126 switch (l4_hdr) {
4127 case IPPROTO_TCP:
4128 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4129 mss_l4len_idx = tcp_hdrlen(skb) <<
4130 E1000_ADVTXD_L4LEN_SHIFT;
4131 break;
4132 case IPPROTO_SCTP:
4133 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4134 mss_l4len_idx = sizeof(struct sctphdr) <<
4135 E1000_ADVTXD_L4LEN_SHIFT;
4136 break;
4137 case IPPROTO_UDP:
4138 mss_l4len_idx = sizeof(struct udphdr) <<
4139 E1000_ADVTXD_L4LEN_SHIFT;
4140 break;
4141 default:
4142 if (unlikely(net_ratelimit())) {
4143 dev_warn(tx_ring->dev,
4144 "partial checksum but l4 proto=%x!\n",
4145 l4_hdr);
4147 break;
4150 /* update TX checksum flag */
4151 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4154 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4155 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4157 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4160 static __le32 igb_tx_cmd_type(u32 tx_flags)
4162 /* set type for advanced descriptor with frame checksum insertion */
4163 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4164 E1000_ADVTXD_DCMD_IFCS |
4165 E1000_ADVTXD_DCMD_DEXT);
4167 /* set HW vlan bit if vlan is present */
4168 if (tx_flags & IGB_TX_FLAGS_VLAN)
4169 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4171 /* set timestamp bit if present */
4172 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4173 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4175 /* set segmentation bits for TSO */
4176 if (tx_flags & IGB_TX_FLAGS_TSO)
4177 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4179 return cmd_type;
4182 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4183 union e1000_adv_tx_desc *tx_desc,
4184 u32 tx_flags, unsigned int paylen)
4186 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4188 /* 82575 requires a unique index per ring if any offload is enabled */
4189 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4190 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4191 olinfo_status |= tx_ring->reg_idx << 4;
4193 /* insert L4 checksum */
4194 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4195 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4197 /* insert IPv4 checksum */
4198 if (tx_flags & IGB_TX_FLAGS_IPV4)
4199 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4202 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4206 * The largest size we can write to the descriptor is 65535. In order to
4207 * maintain a power of two alignment we have to limit ourselves to 32K.
4209 #define IGB_MAX_TXD_PWR 15
4210 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4212 static void igb_tx_map(struct igb_ring *tx_ring,
4213 struct igb_tx_buffer *first,
4214 const u8 hdr_len)
4216 struct sk_buff *skb = first->skb;
4217 struct igb_tx_buffer *tx_buffer_info;
4218 union e1000_adv_tx_desc *tx_desc;
4219 dma_addr_t dma;
4220 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4221 unsigned int data_len = skb->data_len;
4222 unsigned int size = skb_headlen(skb);
4223 unsigned int paylen = skb->len - hdr_len;
4224 __le32 cmd_type;
4225 u32 tx_flags = first->tx_flags;
4226 u16 i = tx_ring->next_to_use;
4228 tx_desc = IGB_TX_DESC(tx_ring, i);
4230 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4231 cmd_type = igb_tx_cmd_type(tx_flags);
4233 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4234 if (dma_mapping_error(tx_ring->dev, dma))
4235 goto dma_error;
4237 /* record length, and DMA address */
4238 first->length = size;
4239 first->dma = dma;
4240 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4242 for (;;) {
4243 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4244 tx_desc->read.cmd_type_len =
4245 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4247 i++;
4248 tx_desc++;
4249 if (i == tx_ring->count) {
4250 tx_desc = IGB_TX_DESC(tx_ring, 0);
4251 i = 0;
4254 dma += IGB_MAX_DATA_PER_TXD;
4255 size -= IGB_MAX_DATA_PER_TXD;
4257 tx_desc->read.olinfo_status = 0;
4258 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4261 if (likely(!data_len))
4262 break;
4264 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4266 i++;
4267 tx_desc++;
4268 if (i == tx_ring->count) {
4269 tx_desc = IGB_TX_DESC(tx_ring, 0);
4270 i = 0;
4273 size = skb_frag_size(frag);
4274 data_len -= size;
4276 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4277 size, DMA_TO_DEVICE);
4278 if (dma_mapping_error(tx_ring->dev, dma))
4279 goto dma_error;
4281 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4282 tx_buffer_info->length = size;
4283 tx_buffer_info->dma = dma;
4285 tx_desc->read.olinfo_status = 0;
4286 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4288 frag++;
4291 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4293 /* write last descriptor with RS and EOP bits */
4294 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4295 tx_desc->read.cmd_type_len = cmd_type;
4297 /* set the timestamp */
4298 first->time_stamp = jiffies;
4301 * Force memory writes to complete before letting h/w know there
4302 * are new descriptors to fetch. (Only applicable for weak-ordered
4303 * memory model archs, such as IA-64).
4305 * We also need this memory barrier to make certain all of the
4306 * status bits have been updated before next_to_watch is written.
4308 wmb();
4310 /* set next_to_watch value indicating a packet is present */
4311 first->next_to_watch = tx_desc;
4313 i++;
4314 if (i == tx_ring->count)
4315 i = 0;
4317 tx_ring->next_to_use = i;
4319 writel(i, tx_ring->tail);
4321 /* we need this if more than one processor can write to our tail
4322 * at a time, it syncronizes IO on IA64/Altix systems */
4323 mmiowb();
4325 return;
4327 dma_error:
4328 dev_err(tx_ring->dev, "TX DMA map failed\n");
4330 /* clear dma mappings for failed tx_buffer_info map */
4331 for (;;) {
4332 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4333 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4334 if (tx_buffer_info == first)
4335 break;
4336 if (i == 0)
4337 i = tx_ring->count;
4338 i--;
4341 tx_ring->next_to_use = i;
4344 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4346 struct net_device *netdev = tx_ring->netdev;
4348 netif_stop_subqueue(netdev, tx_ring->queue_index);
4350 /* Herbert's original patch had:
4351 * smp_mb__after_netif_stop_queue();
4352 * but since that doesn't exist yet, just open code it. */
4353 smp_mb();
4355 /* We need to check again in a case another CPU has just
4356 * made room available. */
4357 if (igb_desc_unused(tx_ring) < size)
4358 return -EBUSY;
4360 /* A reprieve! */
4361 netif_wake_subqueue(netdev, tx_ring->queue_index);
4363 u64_stats_update_begin(&tx_ring->tx_syncp2);
4364 tx_ring->tx_stats.restart_queue2++;
4365 u64_stats_update_end(&tx_ring->tx_syncp2);
4367 return 0;
4370 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4372 if (igb_desc_unused(tx_ring) >= size)
4373 return 0;
4374 return __igb_maybe_stop_tx(tx_ring, size);
4377 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4378 struct igb_ring *tx_ring)
4380 struct igb_tx_buffer *first;
4381 int tso;
4382 u32 tx_flags = 0;
4383 __be16 protocol = vlan_get_protocol(skb);
4384 u8 hdr_len = 0;
4386 /* need: 1 descriptor per page,
4387 * + 2 desc gap to keep tail from touching head,
4388 * + 1 desc for skb->data,
4389 * + 1 desc for context descriptor,
4390 * otherwise try next time */
4391 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4392 /* this is a hard error */
4393 return NETDEV_TX_BUSY;
4396 /* record the location of the first descriptor for this packet */
4397 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4398 first->skb = skb;
4399 first->bytecount = skb->len;
4400 first->gso_segs = 1;
4402 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4403 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4404 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4407 if (vlan_tx_tag_present(skb)) {
4408 tx_flags |= IGB_TX_FLAGS_VLAN;
4409 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4412 /* record initial flags and protocol */
4413 first->tx_flags = tx_flags;
4414 first->protocol = protocol;
4416 tso = igb_tso(tx_ring, first, &hdr_len);
4417 if (tso < 0)
4418 goto out_drop;
4419 else if (!tso)
4420 igb_tx_csum(tx_ring, first);
4422 igb_tx_map(tx_ring, first, hdr_len);
4424 /* Make sure there is space in the ring for the next send. */
4425 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4427 return NETDEV_TX_OK;
4429 out_drop:
4430 igb_unmap_and_free_tx_resource(tx_ring, first);
4432 return NETDEV_TX_OK;
4435 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4436 struct sk_buff *skb)
4438 unsigned int r_idx = skb->queue_mapping;
4440 if (r_idx >= adapter->num_tx_queues)
4441 r_idx = r_idx % adapter->num_tx_queues;
4443 return adapter->tx_ring[r_idx];
4446 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4447 struct net_device *netdev)
4449 struct igb_adapter *adapter = netdev_priv(netdev);
4451 if (test_bit(__IGB_DOWN, &adapter->state)) {
4452 dev_kfree_skb_any(skb);
4453 return NETDEV_TX_OK;
4456 if (skb->len <= 0) {
4457 dev_kfree_skb_any(skb);
4458 return NETDEV_TX_OK;
4462 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4463 * in order to meet this minimum size requirement.
4465 if (skb->len < 17) {
4466 if (skb_padto(skb, 17))
4467 return NETDEV_TX_OK;
4468 skb->len = 17;
4471 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4475 * igb_tx_timeout - Respond to a Tx Hang
4476 * @netdev: network interface device structure
4478 static void igb_tx_timeout(struct net_device *netdev)
4480 struct igb_adapter *adapter = netdev_priv(netdev);
4481 struct e1000_hw *hw = &adapter->hw;
4483 /* Do the reset outside of interrupt context */
4484 adapter->tx_timeout_count++;
4486 if (hw->mac.type >= e1000_82580)
4487 hw->dev_spec._82575.global_device_reset = true;
4489 schedule_work(&adapter->reset_task);
4490 wr32(E1000_EICS,
4491 (adapter->eims_enable_mask & ~adapter->eims_other));
4494 static void igb_reset_task(struct work_struct *work)
4496 struct igb_adapter *adapter;
4497 adapter = container_of(work, struct igb_adapter, reset_task);
4499 igb_dump(adapter);
4500 netdev_err(adapter->netdev, "Reset adapter\n");
4501 igb_reinit_locked(adapter);
4505 * igb_get_stats64 - Get System Network Statistics
4506 * @netdev: network interface device structure
4507 * @stats: rtnl_link_stats64 pointer
4510 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4511 struct rtnl_link_stats64 *stats)
4513 struct igb_adapter *adapter = netdev_priv(netdev);
4515 spin_lock(&adapter->stats64_lock);
4516 igb_update_stats(adapter, &adapter->stats64);
4517 memcpy(stats, &adapter->stats64, sizeof(*stats));
4518 spin_unlock(&adapter->stats64_lock);
4520 return stats;
4524 * igb_change_mtu - Change the Maximum Transfer Unit
4525 * @netdev: network interface device structure
4526 * @new_mtu: new value for maximum frame size
4528 * Returns 0 on success, negative on failure
4530 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4532 struct igb_adapter *adapter = netdev_priv(netdev);
4533 struct pci_dev *pdev = adapter->pdev;
4534 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4536 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4537 dev_err(&pdev->dev, "Invalid MTU setting\n");
4538 return -EINVAL;
4541 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4542 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4543 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4544 return -EINVAL;
4547 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4548 msleep(1);
4550 /* igb_down has a dependency on max_frame_size */
4551 adapter->max_frame_size = max_frame;
4553 if (netif_running(netdev))
4554 igb_down(adapter);
4556 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4557 netdev->mtu, new_mtu);
4558 netdev->mtu = new_mtu;
4560 if (netif_running(netdev))
4561 igb_up(adapter);
4562 else
4563 igb_reset(adapter);
4565 clear_bit(__IGB_RESETTING, &adapter->state);
4567 return 0;
4571 * igb_update_stats - Update the board statistics counters
4572 * @adapter: board private structure
4575 void igb_update_stats(struct igb_adapter *adapter,
4576 struct rtnl_link_stats64 *net_stats)
4578 struct e1000_hw *hw = &adapter->hw;
4579 struct pci_dev *pdev = adapter->pdev;
4580 u32 reg, mpc;
4581 u16 phy_tmp;
4582 int i;
4583 u64 bytes, packets;
4584 unsigned int start;
4585 u64 _bytes, _packets;
4587 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4590 * Prevent stats update while adapter is being reset, or if the pci
4591 * connection is down.
4593 if (adapter->link_speed == 0)
4594 return;
4595 if (pci_channel_offline(pdev))
4596 return;
4598 bytes = 0;
4599 packets = 0;
4600 for (i = 0; i < adapter->num_rx_queues; i++) {
4601 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4602 struct igb_ring *ring = adapter->rx_ring[i];
4604 ring->rx_stats.drops += rqdpc_tmp;
4605 net_stats->rx_fifo_errors += rqdpc_tmp;
4607 do {
4608 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4609 _bytes = ring->rx_stats.bytes;
4610 _packets = ring->rx_stats.packets;
4611 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4612 bytes += _bytes;
4613 packets += _packets;
4616 net_stats->rx_bytes = bytes;
4617 net_stats->rx_packets = packets;
4619 bytes = 0;
4620 packets = 0;
4621 for (i = 0; i < adapter->num_tx_queues; i++) {
4622 struct igb_ring *ring = adapter->tx_ring[i];
4623 do {
4624 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4625 _bytes = ring->tx_stats.bytes;
4626 _packets = ring->tx_stats.packets;
4627 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4628 bytes += _bytes;
4629 packets += _packets;
4631 net_stats->tx_bytes = bytes;
4632 net_stats->tx_packets = packets;
4634 /* read stats registers */
4635 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4636 adapter->stats.gprc += rd32(E1000_GPRC);
4637 adapter->stats.gorc += rd32(E1000_GORCL);
4638 rd32(E1000_GORCH); /* clear GORCL */
4639 adapter->stats.bprc += rd32(E1000_BPRC);
4640 adapter->stats.mprc += rd32(E1000_MPRC);
4641 adapter->stats.roc += rd32(E1000_ROC);
4643 adapter->stats.prc64 += rd32(E1000_PRC64);
4644 adapter->stats.prc127 += rd32(E1000_PRC127);
4645 adapter->stats.prc255 += rd32(E1000_PRC255);
4646 adapter->stats.prc511 += rd32(E1000_PRC511);
4647 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4648 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4649 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4650 adapter->stats.sec += rd32(E1000_SEC);
4652 mpc = rd32(E1000_MPC);
4653 adapter->stats.mpc += mpc;
4654 net_stats->rx_fifo_errors += mpc;
4655 adapter->stats.scc += rd32(E1000_SCC);
4656 adapter->stats.ecol += rd32(E1000_ECOL);
4657 adapter->stats.mcc += rd32(E1000_MCC);
4658 adapter->stats.latecol += rd32(E1000_LATECOL);
4659 adapter->stats.dc += rd32(E1000_DC);
4660 adapter->stats.rlec += rd32(E1000_RLEC);
4661 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4662 adapter->stats.xontxc += rd32(E1000_XONTXC);
4663 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4664 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4665 adapter->stats.fcruc += rd32(E1000_FCRUC);
4666 adapter->stats.gptc += rd32(E1000_GPTC);
4667 adapter->stats.gotc += rd32(E1000_GOTCL);
4668 rd32(E1000_GOTCH); /* clear GOTCL */
4669 adapter->stats.rnbc += rd32(E1000_RNBC);
4670 adapter->stats.ruc += rd32(E1000_RUC);
4671 adapter->stats.rfc += rd32(E1000_RFC);
4672 adapter->stats.rjc += rd32(E1000_RJC);
4673 adapter->stats.tor += rd32(E1000_TORH);
4674 adapter->stats.tot += rd32(E1000_TOTH);
4675 adapter->stats.tpr += rd32(E1000_TPR);
4677 adapter->stats.ptc64 += rd32(E1000_PTC64);
4678 adapter->stats.ptc127 += rd32(E1000_PTC127);
4679 adapter->stats.ptc255 += rd32(E1000_PTC255);
4680 adapter->stats.ptc511 += rd32(E1000_PTC511);
4681 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4682 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4684 adapter->stats.mptc += rd32(E1000_MPTC);
4685 adapter->stats.bptc += rd32(E1000_BPTC);
4687 adapter->stats.tpt += rd32(E1000_TPT);
4688 adapter->stats.colc += rd32(E1000_COLC);
4690 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4691 /* read internal phy specific stats */
4692 reg = rd32(E1000_CTRL_EXT);
4693 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4694 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4695 adapter->stats.tncrs += rd32(E1000_TNCRS);
4698 adapter->stats.tsctc += rd32(E1000_TSCTC);
4699 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4701 adapter->stats.iac += rd32(E1000_IAC);
4702 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4703 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4704 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4705 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4706 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4707 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4708 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4709 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4711 /* Fill out the OS statistics structure */
4712 net_stats->multicast = adapter->stats.mprc;
4713 net_stats->collisions = adapter->stats.colc;
4715 /* Rx Errors */
4717 /* RLEC on some newer hardware can be incorrect so build
4718 * our own version based on RUC and ROC */
4719 net_stats->rx_errors = adapter->stats.rxerrc +
4720 adapter->stats.crcerrs + adapter->stats.algnerrc +
4721 adapter->stats.ruc + adapter->stats.roc +
4722 adapter->stats.cexterr;
4723 net_stats->rx_length_errors = adapter->stats.ruc +
4724 adapter->stats.roc;
4725 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4726 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4727 net_stats->rx_missed_errors = adapter->stats.mpc;
4729 /* Tx Errors */
4730 net_stats->tx_errors = adapter->stats.ecol +
4731 adapter->stats.latecol;
4732 net_stats->tx_aborted_errors = adapter->stats.ecol;
4733 net_stats->tx_window_errors = adapter->stats.latecol;
4734 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4736 /* Tx Dropped needs to be maintained elsewhere */
4738 /* Phy Stats */
4739 if (hw->phy.media_type == e1000_media_type_copper) {
4740 if ((adapter->link_speed == SPEED_1000) &&
4741 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4742 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4743 adapter->phy_stats.idle_errors += phy_tmp;
4747 /* Management Stats */
4748 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4749 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4750 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4752 /* OS2BMC Stats */
4753 reg = rd32(E1000_MANC);
4754 if (reg & E1000_MANC_EN_BMC2OS) {
4755 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4756 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4757 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4758 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4762 static irqreturn_t igb_msix_other(int irq, void *data)
4764 struct igb_adapter *adapter = data;
4765 struct e1000_hw *hw = &adapter->hw;
4766 u32 icr = rd32(E1000_ICR);
4767 /* reading ICR causes bit 31 of EICR to be cleared */
4769 if (icr & E1000_ICR_DRSTA)
4770 schedule_work(&adapter->reset_task);
4772 if (icr & E1000_ICR_DOUTSYNC) {
4773 /* HW is reporting DMA is out of sync */
4774 adapter->stats.doosync++;
4775 /* The DMA Out of Sync is also indication of a spoof event
4776 * in IOV mode. Check the Wrong VM Behavior register to
4777 * see if it is really a spoof event. */
4778 igb_check_wvbr(adapter);
4781 /* Check for a mailbox event */
4782 if (icr & E1000_ICR_VMMB)
4783 igb_msg_task(adapter);
4785 if (icr & E1000_ICR_LSC) {
4786 hw->mac.get_link_status = 1;
4787 /* guard against interrupt when we're going down */
4788 if (!test_bit(__IGB_DOWN, &adapter->state))
4789 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4792 wr32(E1000_EIMS, adapter->eims_other);
4794 return IRQ_HANDLED;
4797 static void igb_write_itr(struct igb_q_vector *q_vector)
4799 struct igb_adapter *adapter = q_vector->adapter;
4800 u32 itr_val = q_vector->itr_val & 0x7FFC;
4802 if (!q_vector->set_itr)
4803 return;
4805 if (!itr_val)
4806 itr_val = 0x4;
4808 if (adapter->hw.mac.type == e1000_82575)
4809 itr_val |= itr_val << 16;
4810 else
4811 itr_val |= E1000_EITR_CNT_IGNR;
4813 writel(itr_val, q_vector->itr_register);
4814 q_vector->set_itr = 0;
4817 static irqreturn_t igb_msix_ring(int irq, void *data)
4819 struct igb_q_vector *q_vector = data;
4821 /* Write the ITR value calculated from the previous interrupt. */
4822 igb_write_itr(q_vector);
4824 napi_schedule(&q_vector->napi);
4826 return IRQ_HANDLED;
4829 #ifdef CONFIG_IGB_DCA
4830 static void igb_update_dca(struct igb_q_vector *q_vector)
4832 struct igb_adapter *adapter = q_vector->adapter;
4833 struct e1000_hw *hw = &adapter->hw;
4834 int cpu = get_cpu();
4836 if (q_vector->cpu == cpu)
4837 goto out_no_update;
4839 if (q_vector->tx.ring) {
4840 int q = q_vector->tx.ring->reg_idx;
4841 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4842 if (hw->mac.type == e1000_82575) {
4843 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4844 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4845 } else {
4846 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4847 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4848 E1000_DCA_TXCTRL_CPUID_SHIFT;
4850 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4851 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4853 if (q_vector->rx.ring) {
4854 int q = q_vector->rx.ring->reg_idx;
4855 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4856 if (hw->mac.type == e1000_82575) {
4857 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4858 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4859 } else {
4860 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4861 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4862 E1000_DCA_RXCTRL_CPUID_SHIFT;
4864 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4865 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4866 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4867 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4869 q_vector->cpu = cpu;
4870 out_no_update:
4871 put_cpu();
4874 static void igb_setup_dca(struct igb_adapter *adapter)
4876 struct e1000_hw *hw = &adapter->hw;
4877 int i;
4879 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4880 return;
4882 /* Always use CB2 mode, difference is masked in the CB driver. */
4883 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4885 for (i = 0; i < adapter->num_q_vectors; i++) {
4886 adapter->q_vector[i]->cpu = -1;
4887 igb_update_dca(adapter->q_vector[i]);
4891 static int __igb_notify_dca(struct device *dev, void *data)
4893 struct net_device *netdev = dev_get_drvdata(dev);
4894 struct igb_adapter *adapter = netdev_priv(netdev);
4895 struct pci_dev *pdev = adapter->pdev;
4896 struct e1000_hw *hw = &adapter->hw;
4897 unsigned long event = *(unsigned long *)data;
4899 switch (event) {
4900 case DCA_PROVIDER_ADD:
4901 /* if already enabled, don't do it again */
4902 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4903 break;
4904 if (dca_add_requester(dev) == 0) {
4905 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4906 dev_info(&pdev->dev, "DCA enabled\n");
4907 igb_setup_dca(adapter);
4908 break;
4910 /* Fall Through since DCA is disabled. */
4911 case DCA_PROVIDER_REMOVE:
4912 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4913 /* without this a class_device is left
4914 * hanging around in the sysfs model */
4915 dca_remove_requester(dev);
4916 dev_info(&pdev->dev, "DCA disabled\n");
4917 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4918 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4920 break;
4923 return 0;
4926 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4927 void *p)
4929 int ret_val;
4931 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4932 __igb_notify_dca);
4934 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4936 #endif /* CONFIG_IGB_DCA */
4938 #ifdef CONFIG_PCI_IOV
4939 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4941 unsigned char mac_addr[ETH_ALEN];
4942 struct pci_dev *pdev = adapter->pdev;
4943 struct e1000_hw *hw = &adapter->hw;
4944 struct pci_dev *pvfdev;
4945 unsigned int device_id;
4946 u16 thisvf_devfn;
4948 random_ether_addr(mac_addr);
4949 igb_set_vf_mac(adapter, vf, mac_addr);
4951 switch (adapter->hw.mac.type) {
4952 case e1000_82576:
4953 device_id = IGB_82576_VF_DEV_ID;
4954 /* VF Stride for 82576 is 2 */
4955 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4956 (pdev->devfn & 1);
4957 break;
4958 case e1000_i350:
4959 device_id = IGB_I350_VF_DEV_ID;
4960 /* VF Stride for I350 is 4 */
4961 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4962 (pdev->devfn & 3);
4963 break;
4964 default:
4965 device_id = 0;
4966 thisvf_devfn = 0;
4967 break;
4970 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4971 while (pvfdev) {
4972 if (pvfdev->devfn == thisvf_devfn)
4973 break;
4974 pvfdev = pci_get_device(hw->vendor_id,
4975 device_id, pvfdev);
4978 if (pvfdev)
4979 adapter->vf_data[vf].vfdev = pvfdev;
4980 else
4981 dev_err(&pdev->dev,
4982 "Couldn't find pci dev ptr for VF %4.4x\n",
4983 thisvf_devfn);
4984 return pvfdev != NULL;
4987 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4989 struct e1000_hw *hw = &adapter->hw;
4990 struct pci_dev *pdev = adapter->pdev;
4991 struct pci_dev *pvfdev;
4992 u16 vf_devfn = 0;
4993 u16 vf_stride;
4994 unsigned int device_id;
4995 int vfs_found = 0;
4997 switch (adapter->hw.mac.type) {
4998 case e1000_82576:
4999 device_id = IGB_82576_VF_DEV_ID;
5000 /* VF Stride for 82576 is 2 */
5001 vf_stride = 2;
5002 break;
5003 case e1000_i350:
5004 device_id = IGB_I350_VF_DEV_ID;
5005 /* VF Stride for I350 is 4 */
5006 vf_stride = 4;
5007 break;
5008 default:
5009 device_id = 0;
5010 vf_stride = 0;
5011 break;
5014 vf_devfn = pdev->devfn + 0x80;
5015 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5016 while (pvfdev) {
5017 if (pvfdev->devfn == vf_devfn)
5018 vfs_found++;
5019 vf_devfn += vf_stride;
5020 pvfdev = pci_get_device(hw->vendor_id,
5021 device_id, pvfdev);
5024 return vfs_found;
5027 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5029 int i;
5030 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5031 if (adapter->vf_data[i].vfdev) {
5032 if (adapter->vf_data[i].vfdev->dev_flags &
5033 PCI_DEV_FLAGS_ASSIGNED)
5034 return true;
5037 return false;
5040 #endif
5041 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5043 struct e1000_hw *hw = &adapter->hw;
5044 u32 ping;
5045 int i;
5047 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5048 ping = E1000_PF_CONTROL_MSG;
5049 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5050 ping |= E1000_VT_MSGTYPE_CTS;
5051 igb_write_mbx(hw, &ping, 1, i);
5055 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5057 struct e1000_hw *hw = &adapter->hw;
5058 u32 vmolr = rd32(E1000_VMOLR(vf));
5059 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5061 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5062 IGB_VF_FLAG_MULTI_PROMISC);
5063 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5065 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5066 vmolr |= E1000_VMOLR_MPME;
5067 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5068 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5069 } else {
5071 * if we have hashes and we are clearing a multicast promisc
5072 * flag we need to write the hashes to the MTA as this step
5073 * was previously skipped
5075 if (vf_data->num_vf_mc_hashes > 30) {
5076 vmolr |= E1000_VMOLR_MPME;
5077 } else if (vf_data->num_vf_mc_hashes) {
5078 int j;
5079 vmolr |= E1000_VMOLR_ROMPE;
5080 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5081 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5085 wr32(E1000_VMOLR(vf), vmolr);
5087 /* there are flags left unprocessed, likely not supported */
5088 if (*msgbuf & E1000_VT_MSGINFO_MASK)
5089 return -EINVAL;
5091 return 0;
5095 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5096 u32 *msgbuf, u32 vf)
5098 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5099 u16 *hash_list = (u16 *)&msgbuf[1];
5100 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5101 int i;
5103 /* salt away the number of multicast addresses assigned
5104 * to this VF for later use to restore when the PF multi cast
5105 * list changes
5107 vf_data->num_vf_mc_hashes = n;
5109 /* only up to 30 hash values supported */
5110 if (n > 30)
5111 n = 30;
5113 /* store the hashes for later use */
5114 for (i = 0; i < n; i++)
5115 vf_data->vf_mc_hashes[i] = hash_list[i];
5117 /* Flush and reset the mta with the new values */
5118 igb_set_rx_mode(adapter->netdev);
5120 return 0;
5123 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5125 struct e1000_hw *hw = &adapter->hw;
5126 struct vf_data_storage *vf_data;
5127 int i, j;
5129 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5130 u32 vmolr = rd32(E1000_VMOLR(i));
5131 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5133 vf_data = &adapter->vf_data[i];
5135 if ((vf_data->num_vf_mc_hashes > 30) ||
5136 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5137 vmolr |= E1000_VMOLR_MPME;
5138 } else if (vf_data->num_vf_mc_hashes) {
5139 vmolr |= E1000_VMOLR_ROMPE;
5140 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5141 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5143 wr32(E1000_VMOLR(i), vmolr);
5147 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5149 struct e1000_hw *hw = &adapter->hw;
5150 u32 pool_mask, reg, vid;
5151 int i;
5153 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5155 /* Find the vlan filter for this id */
5156 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5157 reg = rd32(E1000_VLVF(i));
5159 /* remove the vf from the pool */
5160 reg &= ~pool_mask;
5162 /* if pool is empty then remove entry from vfta */
5163 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5164 (reg & E1000_VLVF_VLANID_ENABLE)) {
5165 reg = 0;
5166 vid = reg & E1000_VLVF_VLANID_MASK;
5167 igb_vfta_set(hw, vid, false);
5170 wr32(E1000_VLVF(i), reg);
5173 adapter->vf_data[vf].vlans_enabled = 0;
5176 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5178 struct e1000_hw *hw = &adapter->hw;
5179 u32 reg, i;
5181 /* The vlvf table only exists on 82576 hardware and newer */
5182 if (hw->mac.type < e1000_82576)
5183 return -1;
5185 /* we only need to do this if VMDq is enabled */
5186 if (!adapter->vfs_allocated_count)
5187 return -1;
5189 /* Find the vlan filter for this id */
5190 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5191 reg = rd32(E1000_VLVF(i));
5192 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5193 vid == (reg & E1000_VLVF_VLANID_MASK))
5194 break;
5197 if (add) {
5198 if (i == E1000_VLVF_ARRAY_SIZE) {
5199 /* Did not find a matching VLAN ID entry that was
5200 * enabled. Search for a free filter entry, i.e.
5201 * one without the enable bit set
5203 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5204 reg = rd32(E1000_VLVF(i));
5205 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5206 break;
5209 if (i < E1000_VLVF_ARRAY_SIZE) {
5210 /* Found an enabled/available entry */
5211 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5213 /* if !enabled we need to set this up in vfta */
5214 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5215 /* add VID to filter table */
5216 igb_vfta_set(hw, vid, true);
5217 reg |= E1000_VLVF_VLANID_ENABLE;
5219 reg &= ~E1000_VLVF_VLANID_MASK;
5220 reg |= vid;
5221 wr32(E1000_VLVF(i), reg);
5223 /* do not modify RLPML for PF devices */
5224 if (vf >= adapter->vfs_allocated_count)
5225 return 0;
5227 if (!adapter->vf_data[vf].vlans_enabled) {
5228 u32 size;
5229 reg = rd32(E1000_VMOLR(vf));
5230 size = reg & E1000_VMOLR_RLPML_MASK;
5231 size += 4;
5232 reg &= ~E1000_VMOLR_RLPML_MASK;
5233 reg |= size;
5234 wr32(E1000_VMOLR(vf), reg);
5237 adapter->vf_data[vf].vlans_enabled++;
5239 } else {
5240 if (i < E1000_VLVF_ARRAY_SIZE) {
5241 /* remove vf from the pool */
5242 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5243 /* if pool is empty then remove entry from vfta */
5244 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5245 reg = 0;
5246 igb_vfta_set(hw, vid, false);
5248 wr32(E1000_VLVF(i), reg);
5250 /* do not modify RLPML for PF devices */
5251 if (vf >= adapter->vfs_allocated_count)
5252 return 0;
5254 adapter->vf_data[vf].vlans_enabled--;
5255 if (!adapter->vf_data[vf].vlans_enabled) {
5256 u32 size;
5257 reg = rd32(E1000_VMOLR(vf));
5258 size = reg & E1000_VMOLR_RLPML_MASK;
5259 size -= 4;
5260 reg &= ~E1000_VMOLR_RLPML_MASK;
5261 reg |= size;
5262 wr32(E1000_VMOLR(vf), reg);
5266 return 0;
5269 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5271 struct e1000_hw *hw = &adapter->hw;
5273 if (vid)
5274 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5275 else
5276 wr32(E1000_VMVIR(vf), 0);
5279 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5280 int vf, u16 vlan, u8 qos)
5282 int err = 0;
5283 struct igb_adapter *adapter = netdev_priv(netdev);
5285 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5286 return -EINVAL;
5287 if (vlan || qos) {
5288 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5289 if (err)
5290 goto out;
5291 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5292 igb_set_vmolr(adapter, vf, !vlan);
5293 adapter->vf_data[vf].pf_vlan = vlan;
5294 adapter->vf_data[vf].pf_qos = qos;
5295 dev_info(&adapter->pdev->dev,
5296 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5297 if (test_bit(__IGB_DOWN, &adapter->state)) {
5298 dev_warn(&adapter->pdev->dev,
5299 "The VF VLAN has been set,"
5300 " but the PF device is not up.\n");
5301 dev_warn(&adapter->pdev->dev,
5302 "Bring the PF device up before"
5303 " attempting to use the VF device.\n");
5305 } else {
5306 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5307 false, vf);
5308 igb_set_vmvir(adapter, vlan, vf);
5309 igb_set_vmolr(adapter, vf, true);
5310 adapter->vf_data[vf].pf_vlan = 0;
5311 adapter->vf_data[vf].pf_qos = 0;
5313 out:
5314 return err;
5317 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5319 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5320 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5322 return igb_vlvf_set(adapter, vid, add, vf);
5325 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5327 /* clear flags - except flag that indicates PF has set the MAC */
5328 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5329 adapter->vf_data[vf].last_nack = jiffies;
5331 /* reset offloads to defaults */
5332 igb_set_vmolr(adapter, vf, true);
5334 /* reset vlans for device */
5335 igb_clear_vf_vfta(adapter, vf);
5336 if (adapter->vf_data[vf].pf_vlan)
5337 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5338 adapter->vf_data[vf].pf_vlan,
5339 adapter->vf_data[vf].pf_qos);
5340 else
5341 igb_clear_vf_vfta(adapter, vf);
5343 /* reset multicast table array for vf */
5344 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5346 /* Flush and reset the mta with the new values */
5347 igb_set_rx_mode(adapter->netdev);
5350 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5352 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5354 /* generate a new mac address as we were hotplug removed/added */
5355 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5356 random_ether_addr(vf_mac);
5358 /* process remaining reset events */
5359 igb_vf_reset(adapter, vf);
5362 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5364 struct e1000_hw *hw = &adapter->hw;
5365 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5366 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5367 u32 reg, msgbuf[3];
5368 u8 *addr = (u8 *)(&msgbuf[1]);
5370 /* process all the same items cleared in a function level reset */
5371 igb_vf_reset(adapter, vf);
5373 /* set vf mac address */
5374 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5376 /* enable transmit and receive for vf */
5377 reg = rd32(E1000_VFTE);
5378 wr32(E1000_VFTE, reg | (1 << vf));
5379 reg = rd32(E1000_VFRE);
5380 wr32(E1000_VFRE, reg | (1 << vf));
5382 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5384 /* reply to reset with ack and vf mac address */
5385 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5386 memcpy(addr, vf_mac, 6);
5387 igb_write_mbx(hw, msgbuf, 3, vf);
5390 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5393 * The VF MAC Address is stored in a packed array of bytes
5394 * starting at the second 32 bit word of the msg array
5396 unsigned char *addr = (char *)&msg[1];
5397 int err = -1;
5399 if (is_valid_ether_addr(addr))
5400 err = igb_set_vf_mac(adapter, vf, addr);
5402 return err;
5405 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5407 struct e1000_hw *hw = &adapter->hw;
5408 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5409 u32 msg = E1000_VT_MSGTYPE_NACK;
5411 /* if device isn't clear to send it shouldn't be reading either */
5412 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5413 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5414 igb_write_mbx(hw, &msg, 1, vf);
5415 vf_data->last_nack = jiffies;
5419 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5421 struct pci_dev *pdev = adapter->pdev;
5422 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5423 struct e1000_hw *hw = &adapter->hw;
5424 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5425 s32 retval;
5427 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5429 if (retval) {
5430 /* if receive failed revoke VF CTS stats and restart init */
5431 dev_err(&pdev->dev, "Error receiving message from VF\n");
5432 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5433 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5434 return;
5435 goto out;
5438 /* this is a message we already processed, do nothing */
5439 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5440 return;
5443 * until the vf completes a reset it should not be
5444 * allowed to start any configuration.
5447 if (msgbuf[0] == E1000_VF_RESET) {
5448 igb_vf_reset_msg(adapter, vf);
5449 return;
5452 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5453 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5454 return;
5455 retval = -1;
5456 goto out;
5459 switch ((msgbuf[0] & 0xFFFF)) {
5460 case E1000_VF_SET_MAC_ADDR:
5461 retval = -EINVAL;
5462 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5463 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5464 else
5465 dev_warn(&pdev->dev,
5466 "VF %d attempted to override administratively "
5467 "set MAC address\nReload the VF driver to "
5468 "resume operations\n", vf);
5469 break;
5470 case E1000_VF_SET_PROMISC:
5471 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5472 break;
5473 case E1000_VF_SET_MULTICAST:
5474 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5475 break;
5476 case E1000_VF_SET_LPE:
5477 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5478 break;
5479 case E1000_VF_SET_VLAN:
5480 retval = -1;
5481 if (vf_data->pf_vlan)
5482 dev_warn(&pdev->dev,
5483 "VF %d attempted to override administratively "
5484 "set VLAN tag\nReload the VF driver to "
5485 "resume operations\n", vf);
5486 else
5487 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5488 break;
5489 default:
5490 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5491 retval = -1;
5492 break;
5495 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5496 out:
5497 /* notify the VF of the results of what it sent us */
5498 if (retval)
5499 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5500 else
5501 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5503 igb_write_mbx(hw, msgbuf, 1, vf);
5506 static void igb_msg_task(struct igb_adapter *adapter)
5508 struct e1000_hw *hw = &adapter->hw;
5509 u32 vf;
5511 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5512 /* process any reset requests */
5513 if (!igb_check_for_rst(hw, vf))
5514 igb_vf_reset_event(adapter, vf);
5516 /* process any messages pending */
5517 if (!igb_check_for_msg(hw, vf))
5518 igb_rcv_msg_from_vf(adapter, vf);
5520 /* process any acks */
5521 if (!igb_check_for_ack(hw, vf))
5522 igb_rcv_ack_from_vf(adapter, vf);
5527 * igb_set_uta - Set unicast filter table address
5528 * @adapter: board private structure
5530 * The unicast table address is a register array of 32-bit registers.
5531 * The table is meant to be used in a way similar to how the MTA is used
5532 * however due to certain limitations in the hardware it is necessary to
5533 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5534 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5536 static void igb_set_uta(struct igb_adapter *adapter)
5538 struct e1000_hw *hw = &adapter->hw;
5539 int i;
5541 /* The UTA table only exists on 82576 hardware and newer */
5542 if (hw->mac.type < e1000_82576)
5543 return;
5545 /* we only need to do this if VMDq is enabled */
5546 if (!adapter->vfs_allocated_count)
5547 return;
5549 for (i = 0; i < hw->mac.uta_reg_count; i++)
5550 array_wr32(E1000_UTA, i, ~0);
5554 * igb_intr_msi - Interrupt Handler
5555 * @irq: interrupt number
5556 * @data: pointer to a network interface device structure
5558 static irqreturn_t igb_intr_msi(int irq, void *data)
5560 struct igb_adapter *adapter = data;
5561 struct igb_q_vector *q_vector = adapter->q_vector[0];
5562 struct e1000_hw *hw = &adapter->hw;
5563 /* read ICR disables interrupts using IAM */
5564 u32 icr = rd32(E1000_ICR);
5566 igb_write_itr(q_vector);
5568 if (icr & E1000_ICR_DRSTA)
5569 schedule_work(&adapter->reset_task);
5571 if (icr & E1000_ICR_DOUTSYNC) {
5572 /* HW is reporting DMA is out of sync */
5573 adapter->stats.doosync++;
5576 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5577 hw->mac.get_link_status = 1;
5578 if (!test_bit(__IGB_DOWN, &adapter->state))
5579 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5582 napi_schedule(&q_vector->napi);
5584 return IRQ_HANDLED;
5588 * igb_intr - Legacy Interrupt Handler
5589 * @irq: interrupt number
5590 * @data: pointer to a network interface device structure
5592 static irqreturn_t igb_intr(int irq, void *data)
5594 struct igb_adapter *adapter = data;
5595 struct igb_q_vector *q_vector = adapter->q_vector[0];
5596 struct e1000_hw *hw = &adapter->hw;
5597 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5598 * need for the IMC write */
5599 u32 icr = rd32(E1000_ICR);
5601 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5602 * not set, then the adapter didn't send an interrupt */
5603 if (!(icr & E1000_ICR_INT_ASSERTED))
5604 return IRQ_NONE;
5606 igb_write_itr(q_vector);
5608 if (icr & E1000_ICR_DRSTA)
5609 schedule_work(&adapter->reset_task);
5611 if (icr & E1000_ICR_DOUTSYNC) {
5612 /* HW is reporting DMA is out of sync */
5613 adapter->stats.doosync++;
5616 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5617 hw->mac.get_link_status = 1;
5618 /* guard against interrupt when we're going down */
5619 if (!test_bit(__IGB_DOWN, &adapter->state))
5620 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5623 napi_schedule(&q_vector->napi);
5625 return IRQ_HANDLED;
5628 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5630 struct igb_adapter *adapter = q_vector->adapter;
5631 struct e1000_hw *hw = &adapter->hw;
5633 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5634 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5635 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5636 igb_set_itr(q_vector);
5637 else
5638 igb_update_ring_itr(q_vector);
5641 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5642 if (adapter->msix_entries)
5643 wr32(E1000_EIMS, q_vector->eims_value);
5644 else
5645 igb_irq_enable(adapter);
5650 * igb_poll - NAPI Rx polling callback
5651 * @napi: napi polling structure
5652 * @budget: count of how many packets we should handle
5654 static int igb_poll(struct napi_struct *napi, int budget)
5656 struct igb_q_vector *q_vector = container_of(napi,
5657 struct igb_q_vector,
5658 napi);
5659 bool clean_complete = true;
5661 #ifdef CONFIG_IGB_DCA
5662 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5663 igb_update_dca(q_vector);
5664 #endif
5665 if (q_vector->tx.ring)
5666 clean_complete = igb_clean_tx_irq(q_vector);
5668 if (q_vector->rx.ring)
5669 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5671 /* If all work not completed, return budget and keep polling */
5672 if (!clean_complete)
5673 return budget;
5675 /* If not enough Rx work done, exit the polling mode */
5676 napi_complete(napi);
5677 igb_ring_irq_enable(q_vector);
5679 return 0;
5683 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5684 * @adapter: board private structure
5685 * @shhwtstamps: timestamp structure to update
5686 * @regval: unsigned 64bit system time value.
5688 * We need to convert the system time value stored in the RX/TXSTMP registers
5689 * into a hwtstamp which can be used by the upper level timestamping functions
5691 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5692 struct skb_shared_hwtstamps *shhwtstamps,
5693 u64 regval)
5695 u64 ns;
5698 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5699 * 24 to match clock shift we setup earlier.
5701 if (adapter->hw.mac.type >= e1000_82580)
5702 regval <<= IGB_82580_TSYNC_SHIFT;
5704 ns = timecounter_cyc2time(&adapter->clock, regval);
5705 timecompare_update(&adapter->compare, ns);
5706 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5707 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5708 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5712 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5713 * @q_vector: pointer to q_vector containing needed info
5714 * @buffer: pointer to igb_tx_buffer structure
5716 * If we were asked to do hardware stamping and such a time stamp is
5717 * available, then it must have been for this skb here because we only
5718 * allow only one such packet into the queue.
5720 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5721 struct igb_tx_buffer *buffer_info)
5723 struct igb_adapter *adapter = q_vector->adapter;
5724 struct e1000_hw *hw = &adapter->hw;
5725 struct skb_shared_hwtstamps shhwtstamps;
5726 u64 regval;
5728 /* if skb does not support hw timestamp or TX stamp not valid exit */
5729 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5730 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5731 return;
5733 regval = rd32(E1000_TXSTMPL);
5734 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5736 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5737 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5741 * igb_clean_tx_irq - Reclaim resources after transmit completes
5742 * @q_vector: pointer to q_vector containing needed info
5743 * returns true if ring is completely cleaned
5745 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5747 struct igb_adapter *adapter = q_vector->adapter;
5748 struct igb_ring *tx_ring = q_vector->tx.ring;
5749 struct igb_tx_buffer *tx_buffer;
5750 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5751 unsigned int total_bytes = 0, total_packets = 0;
5752 unsigned int budget = q_vector->tx.work_limit;
5753 unsigned int i = tx_ring->next_to_clean;
5755 if (test_bit(__IGB_DOWN, &adapter->state))
5756 return true;
5758 tx_buffer = &tx_ring->tx_buffer_info[i];
5759 tx_desc = IGB_TX_DESC(tx_ring, i);
5760 i -= tx_ring->count;
5762 for (; budget; budget--) {
5763 eop_desc = tx_buffer->next_to_watch;
5765 /* prevent any other reads prior to eop_desc */
5766 rmb();
5768 /* if next_to_watch is not set then there is no work pending */
5769 if (!eop_desc)
5770 break;
5772 /* if DD is not set pending work has not been completed */
5773 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5774 break;
5776 /* clear next_to_watch to prevent false hangs */
5777 tx_buffer->next_to_watch = NULL;
5779 /* update the statistics for this packet */
5780 total_bytes += tx_buffer->bytecount;
5781 total_packets += tx_buffer->gso_segs;
5783 /* retrieve hardware timestamp */
5784 igb_tx_hwtstamp(q_vector, tx_buffer);
5786 /* free the skb */
5787 dev_kfree_skb_any(tx_buffer->skb);
5788 tx_buffer->skb = NULL;
5790 /* unmap skb header data */
5791 dma_unmap_single(tx_ring->dev,
5792 tx_buffer->dma,
5793 tx_buffer->length,
5794 DMA_TO_DEVICE);
5796 /* clear last DMA location and unmap remaining buffers */
5797 while (tx_desc != eop_desc) {
5798 tx_buffer->dma = 0;
5800 tx_buffer++;
5801 tx_desc++;
5802 i++;
5803 if (unlikely(!i)) {
5804 i -= tx_ring->count;
5805 tx_buffer = tx_ring->tx_buffer_info;
5806 tx_desc = IGB_TX_DESC(tx_ring, 0);
5809 /* unmap any remaining paged data */
5810 if (tx_buffer->dma) {
5811 dma_unmap_page(tx_ring->dev,
5812 tx_buffer->dma,
5813 tx_buffer->length,
5814 DMA_TO_DEVICE);
5818 /* clear last DMA location */
5819 tx_buffer->dma = 0;
5821 /* move us one more past the eop_desc for start of next pkt */
5822 tx_buffer++;
5823 tx_desc++;
5824 i++;
5825 if (unlikely(!i)) {
5826 i -= tx_ring->count;
5827 tx_buffer = tx_ring->tx_buffer_info;
5828 tx_desc = IGB_TX_DESC(tx_ring, 0);
5832 netdev_tx_completed_queue(txring_txq(tx_ring),
5833 total_packets, total_bytes);
5834 i += tx_ring->count;
5835 tx_ring->next_to_clean = i;
5836 u64_stats_update_begin(&tx_ring->tx_syncp);
5837 tx_ring->tx_stats.bytes += total_bytes;
5838 tx_ring->tx_stats.packets += total_packets;
5839 u64_stats_update_end(&tx_ring->tx_syncp);
5840 q_vector->tx.total_bytes += total_bytes;
5841 q_vector->tx.total_packets += total_packets;
5843 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5844 struct e1000_hw *hw = &adapter->hw;
5846 eop_desc = tx_buffer->next_to_watch;
5848 /* Detect a transmit hang in hardware, this serializes the
5849 * check with the clearing of time_stamp and movement of i */
5850 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5851 if (eop_desc &&
5852 time_after(jiffies, tx_buffer->time_stamp +
5853 (adapter->tx_timeout_factor * HZ)) &&
5854 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5856 /* detected Tx unit hang */
5857 dev_err(tx_ring->dev,
5858 "Detected Tx Unit Hang\n"
5859 " Tx Queue <%d>\n"
5860 " TDH <%x>\n"
5861 " TDT <%x>\n"
5862 " next_to_use <%x>\n"
5863 " next_to_clean <%x>\n"
5864 "buffer_info[next_to_clean]\n"
5865 " time_stamp <%lx>\n"
5866 " next_to_watch <%p>\n"
5867 " jiffies <%lx>\n"
5868 " desc.status <%x>\n",
5869 tx_ring->queue_index,
5870 rd32(E1000_TDH(tx_ring->reg_idx)),
5871 readl(tx_ring->tail),
5872 tx_ring->next_to_use,
5873 tx_ring->next_to_clean,
5874 tx_buffer->time_stamp,
5875 eop_desc,
5876 jiffies,
5877 eop_desc->wb.status);
5878 netif_stop_subqueue(tx_ring->netdev,
5879 tx_ring->queue_index);
5881 /* we are about to reset, no point in enabling stuff */
5882 return true;
5886 if (unlikely(total_packets &&
5887 netif_carrier_ok(tx_ring->netdev) &&
5888 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5889 /* Make sure that anybody stopping the queue after this
5890 * sees the new next_to_clean.
5892 smp_mb();
5893 if (__netif_subqueue_stopped(tx_ring->netdev,
5894 tx_ring->queue_index) &&
5895 !(test_bit(__IGB_DOWN, &adapter->state))) {
5896 netif_wake_subqueue(tx_ring->netdev,
5897 tx_ring->queue_index);
5899 u64_stats_update_begin(&tx_ring->tx_syncp);
5900 tx_ring->tx_stats.restart_queue++;
5901 u64_stats_update_end(&tx_ring->tx_syncp);
5905 return !!budget;
5908 static inline void igb_rx_checksum(struct igb_ring *ring,
5909 union e1000_adv_rx_desc *rx_desc,
5910 struct sk_buff *skb)
5912 skb_checksum_none_assert(skb);
5914 /* Ignore Checksum bit is set */
5915 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5916 return;
5918 /* Rx checksum disabled via ethtool */
5919 if (!(ring->netdev->features & NETIF_F_RXCSUM))
5920 return;
5922 /* TCP/UDP checksum error bit is set */
5923 if (igb_test_staterr(rx_desc,
5924 E1000_RXDEXT_STATERR_TCPE |
5925 E1000_RXDEXT_STATERR_IPE)) {
5927 * work around errata with sctp packets where the TCPE aka
5928 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5929 * packets, (aka let the stack check the crc32c)
5931 if (!((skb->len == 60) &&
5932 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5933 u64_stats_update_begin(&ring->rx_syncp);
5934 ring->rx_stats.csum_err++;
5935 u64_stats_update_end(&ring->rx_syncp);
5937 /* let the stack verify checksum errors */
5938 return;
5940 /* It must be a TCP or UDP packet with a valid checksum */
5941 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5942 E1000_RXD_STAT_UDPCS))
5943 skb->ip_summed = CHECKSUM_UNNECESSARY;
5945 dev_dbg(ring->dev, "cksum success: bits %08X\n",
5946 le32_to_cpu(rx_desc->wb.upper.status_error));
5949 static inline void igb_rx_hash(struct igb_ring *ring,
5950 union e1000_adv_rx_desc *rx_desc,
5951 struct sk_buff *skb)
5953 if (ring->netdev->features & NETIF_F_RXHASH)
5954 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5957 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5958 union e1000_adv_rx_desc *rx_desc,
5959 struct sk_buff *skb)
5961 struct igb_adapter *adapter = q_vector->adapter;
5962 struct e1000_hw *hw = &adapter->hw;
5963 u64 regval;
5965 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5966 E1000_RXDADV_STAT_TS))
5967 return;
5970 * If this bit is set, then the RX registers contain the time stamp. No
5971 * other packet will be time stamped until we read these registers, so
5972 * read the registers to make them available again. Because only one
5973 * packet can be time stamped at a time, we know that the register
5974 * values must belong to this one here and therefore we don't need to
5975 * compare any of the additional attributes stored for it.
5977 * If nothing went wrong, then it should have a shared tx_flags that we
5978 * can turn into a skb_shared_hwtstamps.
5980 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5981 u32 *stamp = (u32 *)skb->data;
5982 regval = le32_to_cpu(*(stamp + 2));
5983 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5984 skb_pull(skb, IGB_TS_HDR_LEN);
5985 } else {
5986 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5987 return;
5989 regval = rd32(E1000_RXSTMPL);
5990 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5993 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5996 static void igb_rx_vlan(struct igb_ring *ring,
5997 union e1000_adv_rx_desc *rx_desc,
5998 struct sk_buff *skb)
6000 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6001 u16 vid;
6002 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6003 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6004 vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6005 else
6006 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6008 __vlan_hwaccel_put_tag(skb, vid);
6012 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6014 /* HW will not DMA in data larger than the given buffer, even if it
6015 * parses the (NFS, of course) header to be larger. In that case, it
6016 * fills the header buffer and spills the rest into the page.
6018 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6019 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6020 if (hlen > IGB_RX_HDR_LEN)
6021 hlen = IGB_RX_HDR_LEN;
6022 return hlen;
6025 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6027 struct igb_ring *rx_ring = q_vector->rx.ring;
6028 union e1000_adv_rx_desc *rx_desc;
6029 const int current_node = numa_node_id();
6030 unsigned int total_bytes = 0, total_packets = 0;
6031 u16 cleaned_count = igb_desc_unused(rx_ring);
6032 u16 i = rx_ring->next_to_clean;
6034 rx_desc = IGB_RX_DESC(rx_ring, i);
6036 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6037 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6038 struct sk_buff *skb = buffer_info->skb;
6039 union e1000_adv_rx_desc *next_rxd;
6041 buffer_info->skb = NULL;
6042 prefetch(skb->data);
6044 i++;
6045 if (i == rx_ring->count)
6046 i = 0;
6048 next_rxd = IGB_RX_DESC(rx_ring, i);
6049 prefetch(next_rxd);
6052 * This memory barrier is needed to keep us from reading
6053 * any other fields out of the rx_desc until we know the
6054 * RXD_STAT_DD bit is set
6056 rmb();
6058 if (!skb_is_nonlinear(skb)) {
6059 __skb_put(skb, igb_get_hlen(rx_desc));
6060 dma_unmap_single(rx_ring->dev, buffer_info->dma,
6061 IGB_RX_HDR_LEN,
6062 DMA_FROM_DEVICE);
6063 buffer_info->dma = 0;
6066 if (rx_desc->wb.upper.length) {
6067 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6069 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6070 buffer_info->page,
6071 buffer_info->page_offset,
6072 length);
6074 skb->len += length;
6075 skb->data_len += length;
6076 skb->truesize += PAGE_SIZE / 2;
6078 if ((page_count(buffer_info->page) != 1) ||
6079 (page_to_nid(buffer_info->page) != current_node))
6080 buffer_info->page = NULL;
6081 else
6082 get_page(buffer_info->page);
6084 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6085 PAGE_SIZE / 2, DMA_FROM_DEVICE);
6086 buffer_info->page_dma = 0;
6089 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6090 struct igb_rx_buffer *next_buffer;
6091 next_buffer = &rx_ring->rx_buffer_info[i];
6092 buffer_info->skb = next_buffer->skb;
6093 buffer_info->dma = next_buffer->dma;
6094 next_buffer->skb = skb;
6095 next_buffer->dma = 0;
6096 goto next_desc;
6099 if (igb_test_staterr(rx_desc,
6100 E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
6101 dev_kfree_skb_any(skb);
6102 goto next_desc;
6105 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6106 igb_rx_hash(rx_ring, rx_desc, skb);
6107 igb_rx_checksum(rx_ring, rx_desc, skb);
6108 igb_rx_vlan(rx_ring, rx_desc, skb);
6110 total_bytes += skb->len;
6111 total_packets++;
6113 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6115 napi_gro_receive(&q_vector->napi, skb);
6117 budget--;
6118 next_desc:
6119 if (!budget)
6120 break;
6122 cleaned_count++;
6123 /* return some buffers to hardware, one at a time is too slow */
6124 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6125 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6126 cleaned_count = 0;
6129 /* use prefetched values */
6130 rx_desc = next_rxd;
6133 rx_ring->next_to_clean = i;
6134 u64_stats_update_begin(&rx_ring->rx_syncp);
6135 rx_ring->rx_stats.packets += total_packets;
6136 rx_ring->rx_stats.bytes += total_bytes;
6137 u64_stats_update_end(&rx_ring->rx_syncp);
6138 q_vector->rx.total_packets += total_packets;
6139 q_vector->rx.total_bytes += total_bytes;
6141 if (cleaned_count)
6142 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6144 return !!budget;
6147 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6148 struct igb_rx_buffer *bi)
6150 struct sk_buff *skb = bi->skb;
6151 dma_addr_t dma = bi->dma;
6153 if (dma)
6154 return true;
6156 if (likely(!skb)) {
6157 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6158 IGB_RX_HDR_LEN);
6159 bi->skb = skb;
6160 if (!skb) {
6161 rx_ring->rx_stats.alloc_failed++;
6162 return false;
6165 /* initialize skb for ring */
6166 skb_record_rx_queue(skb, rx_ring->queue_index);
6169 dma = dma_map_single(rx_ring->dev, skb->data,
6170 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6172 if (dma_mapping_error(rx_ring->dev, dma)) {
6173 rx_ring->rx_stats.alloc_failed++;
6174 return false;
6177 bi->dma = dma;
6178 return true;
6181 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6182 struct igb_rx_buffer *bi)
6184 struct page *page = bi->page;
6185 dma_addr_t page_dma = bi->page_dma;
6186 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6188 if (page_dma)
6189 return true;
6191 if (!page) {
6192 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6193 bi->page = page;
6194 if (unlikely(!page)) {
6195 rx_ring->rx_stats.alloc_failed++;
6196 return false;
6200 page_dma = dma_map_page(rx_ring->dev, page,
6201 page_offset, PAGE_SIZE / 2,
6202 DMA_FROM_DEVICE);
6204 if (dma_mapping_error(rx_ring->dev, page_dma)) {
6205 rx_ring->rx_stats.alloc_failed++;
6206 return false;
6209 bi->page_dma = page_dma;
6210 bi->page_offset = page_offset;
6211 return true;
6215 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6216 * @adapter: address of board private structure
6218 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6220 union e1000_adv_rx_desc *rx_desc;
6221 struct igb_rx_buffer *bi;
6222 u16 i = rx_ring->next_to_use;
6224 rx_desc = IGB_RX_DESC(rx_ring, i);
6225 bi = &rx_ring->rx_buffer_info[i];
6226 i -= rx_ring->count;
6228 while (cleaned_count--) {
6229 if (!igb_alloc_mapped_skb(rx_ring, bi))
6230 break;
6232 /* Refresh the desc even if buffer_addrs didn't change
6233 * because each write-back erases this info. */
6234 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6236 if (!igb_alloc_mapped_page(rx_ring, bi))
6237 break;
6239 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6241 rx_desc++;
6242 bi++;
6243 i++;
6244 if (unlikely(!i)) {
6245 rx_desc = IGB_RX_DESC(rx_ring, 0);
6246 bi = rx_ring->rx_buffer_info;
6247 i -= rx_ring->count;
6250 /* clear the hdr_addr for the next_to_use descriptor */
6251 rx_desc->read.hdr_addr = 0;
6254 i += rx_ring->count;
6256 if (rx_ring->next_to_use != i) {
6257 rx_ring->next_to_use = i;
6259 /* Force memory writes to complete before letting h/w
6260 * know there are new descriptors to fetch. (Only
6261 * applicable for weak-ordered memory model archs,
6262 * such as IA-64). */
6263 wmb();
6264 writel(i, rx_ring->tail);
6269 * igb_mii_ioctl -
6270 * @netdev:
6271 * @ifreq:
6272 * @cmd:
6274 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6276 struct igb_adapter *adapter = netdev_priv(netdev);
6277 struct mii_ioctl_data *data = if_mii(ifr);
6279 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6280 return -EOPNOTSUPP;
6282 switch (cmd) {
6283 case SIOCGMIIPHY:
6284 data->phy_id = adapter->hw.phy.addr;
6285 break;
6286 case SIOCGMIIREG:
6287 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6288 &data->val_out))
6289 return -EIO;
6290 break;
6291 case SIOCSMIIREG:
6292 default:
6293 return -EOPNOTSUPP;
6295 return 0;
6299 * igb_hwtstamp_ioctl - control hardware time stamping
6300 * @netdev:
6301 * @ifreq:
6302 * @cmd:
6304 * Outgoing time stamping can be enabled and disabled. Play nice and
6305 * disable it when requested, although it shouldn't case any overhead
6306 * when no packet needs it. At most one packet in the queue may be
6307 * marked for time stamping, otherwise it would be impossible to tell
6308 * for sure to which packet the hardware time stamp belongs.
6310 * Incoming time stamping has to be configured via the hardware
6311 * filters. Not all combinations are supported, in particular event
6312 * type has to be specified. Matching the kind of event packet is
6313 * not supported, with the exception of "all V2 events regardless of
6314 * level 2 or 4".
6317 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6318 struct ifreq *ifr, int cmd)
6320 struct igb_adapter *adapter = netdev_priv(netdev);
6321 struct e1000_hw *hw = &adapter->hw;
6322 struct hwtstamp_config config;
6323 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6324 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6325 u32 tsync_rx_cfg = 0;
6326 bool is_l4 = false;
6327 bool is_l2 = false;
6328 u32 regval;
6330 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6331 return -EFAULT;
6333 /* reserved for future extensions */
6334 if (config.flags)
6335 return -EINVAL;
6337 switch (config.tx_type) {
6338 case HWTSTAMP_TX_OFF:
6339 tsync_tx_ctl = 0;
6340 case HWTSTAMP_TX_ON:
6341 break;
6342 default:
6343 return -ERANGE;
6346 switch (config.rx_filter) {
6347 case HWTSTAMP_FILTER_NONE:
6348 tsync_rx_ctl = 0;
6349 break;
6350 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6351 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6352 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6353 case HWTSTAMP_FILTER_ALL:
6355 * register TSYNCRXCFG must be set, therefore it is not
6356 * possible to time stamp both Sync and Delay_Req messages
6357 * => fall back to time stamping all packets
6359 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6360 config.rx_filter = HWTSTAMP_FILTER_ALL;
6361 break;
6362 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6363 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6364 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6365 is_l4 = true;
6366 break;
6367 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6368 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6369 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6370 is_l4 = true;
6371 break;
6372 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6373 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6374 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6375 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6376 is_l2 = true;
6377 is_l4 = true;
6378 config.rx_filter = HWTSTAMP_FILTER_SOME;
6379 break;
6380 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6381 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6382 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6383 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6384 is_l2 = true;
6385 is_l4 = true;
6386 config.rx_filter = HWTSTAMP_FILTER_SOME;
6387 break;
6388 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6389 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6390 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6391 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6392 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6393 is_l2 = true;
6394 is_l4 = true;
6395 break;
6396 default:
6397 return -ERANGE;
6400 if (hw->mac.type == e1000_82575) {
6401 if (tsync_rx_ctl | tsync_tx_ctl)
6402 return -EINVAL;
6403 return 0;
6407 * Per-packet timestamping only works if all packets are
6408 * timestamped, so enable timestamping in all packets as
6409 * long as one rx filter was configured.
6411 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6412 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6413 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6416 /* enable/disable TX */
6417 regval = rd32(E1000_TSYNCTXCTL);
6418 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6419 regval |= tsync_tx_ctl;
6420 wr32(E1000_TSYNCTXCTL, regval);
6422 /* enable/disable RX */
6423 regval = rd32(E1000_TSYNCRXCTL);
6424 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6425 regval |= tsync_rx_ctl;
6426 wr32(E1000_TSYNCRXCTL, regval);
6428 /* define which PTP packets are time stamped */
6429 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6431 /* define ethertype filter for timestamped packets */
6432 if (is_l2)
6433 wr32(E1000_ETQF(3),
6434 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6435 E1000_ETQF_1588 | /* enable timestamping */
6436 ETH_P_1588)); /* 1588 eth protocol type */
6437 else
6438 wr32(E1000_ETQF(3), 0);
6440 #define PTP_PORT 319
6441 /* L4 Queue Filter[3]: filter by destination port and protocol */
6442 if (is_l4) {
6443 u32 ftqf = (IPPROTO_UDP /* UDP */
6444 | E1000_FTQF_VF_BP /* VF not compared */
6445 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6446 | E1000_FTQF_MASK); /* mask all inputs */
6447 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6449 wr32(E1000_IMIR(3), htons(PTP_PORT));
6450 wr32(E1000_IMIREXT(3),
6451 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6452 if (hw->mac.type == e1000_82576) {
6453 /* enable source port check */
6454 wr32(E1000_SPQF(3), htons(PTP_PORT));
6455 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6457 wr32(E1000_FTQF(3), ftqf);
6458 } else {
6459 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6461 wrfl();
6463 adapter->hwtstamp_config = config;
6465 /* clear TX/RX time stamp registers, just to be sure */
6466 regval = rd32(E1000_TXSTMPH);
6467 regval = rd32(E1000_RXSTMPH);
6469 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6470 -EFAULT : 0;
6474 * igb_ioctl -
6475 * @netdev:
6476 * @ifreq:
6477 * @cmd:
6479 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6481 switch (cmd) {
6482 case SIOCGMIIPHY:
6483 case SIOCGMIIREG:
6484 case SIOCSMIIREG:
6485 return igb_mii_ioctl(netdev, ifr, cmd);
6486 case SIOCSHWTSTAMP:
6487 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6488 default:
6489 return -EOPNOTSUPP;
6493 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6495 struct igb_adapter *adapter = hw->back;
6496 u16 cap_offset;
6498 cap_offset = adapter->pdev->pcie_cap;
6499 if (!cap_offset)
6500 return -E1000_ERR_CONFIG;
6502 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6504 return 0;
6507 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6509 struct igb_adapter *adapter = hw->back;
6510 u16 cap_offset;
6512 cap_offset = adapter->pdev->pcie_cap;
6513 if (!cap_offset)
6514 return -E1000_ERR_CONFIG;
6516 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6518 return 0;
6521 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6523 struct igb_adapter *adapter = netdev_priv(netdev);
6524 struct e1000_hw *hw = &adapter->hw;
6525 u32 ctrl, rctl;
6526 bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6528 if (enable) {
6529 /* enable VLAN tag insert/strip */
6530 ctrl = rd32(E1000_CTRL);
6531 ctrl |= E1000_CTRL_VME;
6532 wr32(E1000_CTRL, ctrl);
6534 /* Disable CFI check */
6535 rctl = rd32(E1000_RCTL);
6536 rctl &= ~E1000_RCTL_CFIEN;
6537 wr32(E1000_RCTL, rctl);
6538 } else {
6539 /* disable VLAN tag insert/strip */
6540 ctrl = rd32(E1000_CTRL);
6541 ctrl &= ~E1000_CTRL_VME;
6542 wr32(E1000_CTRL, ctrl);
6545 igb_rlpml_set(adapter);
6548 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6550 struct igb_adapter *adapter = netdev_priv(netdev);
6551 struct e1000_hw *hw = &adapter->hw;
6552 int pf_id = adapter->vfs_allocated_count;
6554 /* attempt to add filter to vlvf array */
6555 igb_vlvf_set(adapter, vid, true, pf_id);
6557 /* add the filter since PF can receive vlans w/o entry in vlvf */
6558 igb_vfta_set(hw, vid, true);
6560 set_bit(vid, adapter->active_vlans);
6562 return 0;
6565 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6567 struct igb_adapter *adapter = netdev_priv(netdev);
6568 struct e1000_hw *hw = &adapter->hw;
6569 int pf_id = adapter->vfs_allocated_count;
6570 s32 err;
6572 /* remove vlan from VLVF table array */
6573 err = igb_vlvf_set(adapter, vid, false, pf_id);
6575 /* if vid was not present in VLVF just remove it from table */
6576 if (err)
6577 igb_vfta_set(hw, vid, false);
6579 clear_bit(vid, adapter->active_vlans);
6581 return 0;
6584 static void igb_restore_vlan(struct igb_adapter *adapter)
6586 u16 vid;
6588 igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6590 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6591 igb_vlan_rx_add_vid(adapter->netdev, vid);
6594 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6596 struct pci_dev *pdev = adapter->pdev;
6597 struct e1000_mac_info *mac = &adapter->hw.mac;
6599 mac->autoneg = 0;
6601 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6602 * for the switch() below to work */
6603 if ((spd & 1) || (dplx & ~1))
6604 goto err_inval;
6606 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6607 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6608 spd != SPEED_1000 &&
6609 dplx != DUPLEX_FULL)
6610 goto err_inval;
6612 switch (spd + dplx) {
6613 case SPEED_10 + DUPLEX_HALF:
6614 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6615 break;
6616 case SPEED_10 + DUPLEX_FULL:
6617 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6618 break;
6619 case SPEED_100 + DUPLEX_HALF:
6620 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6621 break;
6622 case SPEED_100 + DUPLEX_FULL:
6623 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6624 break;
6625 case SPEED_1000 + DUPLEX_FULL:
6626 mac->autoneg = 1;
6627 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6628 break;
6629 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6630 default:
6631 goto err_inval;
6633 return 0;
6635 err_inval:
6636 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6637 return -EINVAL;
6640 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6641 bool runtime)
6643 struct net_device *netdev = pci_get_drvdata(pdev);
6644 struct igb_adapter *adapter = netdev_priv(netdev);
6645 struct e1000_hw *hw = &adapter->hw;
6646 u32 ctrl, rctl, status;
6647 u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6648 #ifdef CONFIG_PM
6649 int retval = 0;
6650 #endif
6652 netif_device_detach(netdev);
6654 if (netif_running(netdev))
6655 __igb_close(netdev, true);
6657 igb_clear_interrupt_scheme(adapter);
6659 #ifdef CONFIG_PM
6660 retval = pci_save_state(pdev);
6661 if (retval)
6662 return retval;
6663 #endif
6665 status = rd32(E1000_STATUS);
6666 if (status & E1000_STATUS_LU)
6667 wufc &= ~E1000_WUFC_LNKC;
6669 if (wufc) {
6670 igb_setup_rctl(adapter);
6671 igb_set_rx_mode(netdev);
6673 /* turn on all-multi mode if wake on multicast is enabled */
6674 if (wufc & E1000_WUFC_MC) {
6675 rctl = rd32(E1000_RCTL);
6676 rctl |= E1000_RCTL_MPE;
6677 wr32(E1000_RCTL, rctl);
6680 ctrl = rd32(E1000_CTRL);
6681 /* advertise wake from D3Cold */
6682 #define E1000_CTRL_ADVD3WUC 0x00100000
6683 /* phy power management enable */
6684 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6685 ctrl |= E1000_CTRL_ADVD3WUC;
6686 wr32(E1000_CTRL, ctrl);
6688 /* Allow time for pending master requests to run */
6689 igb_disable_pcie_master(hw);
6691 wr32(E1000_WUC, E1000_WUC_PME_EN);
6692 wr32(E1000_WUFC, wufc);
6693 } else {
6694 wr32(E1000_WUC, 0);
6695 wr32(E1000_WUFC, 0);
6698 *enable_wake = wufc || adapter->en_mng_pt;
6699 if (!*enable_wake)
6700 igb_power_down_link(adapter);
6701 else
6702 igb_power_up_link(adapter);
6704 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6705 * would have already happened in close and is redundant. */
6706 igb_release_hw_control(adapter);
6708 pci_disable_device(pdev);
6710 return 0;
6713 #ifdef CONFIG_PM
6714 #ifdef CONFIG_PM_SLEEP
6715 static int igb_suspend(struct device *dev)
6717 int retval;
6718 bool wake;
6719 struct pci_dev *pdev = to_pci_dev(dev);
6721 retval = __igb_shutdown(pdev, &wake, 0);
6722 if (retval)
6723 return retval;
6725 if (wake) {
6726 pci_prepare_to_sleep(pdev);
6727 } else {
6728 pci_wake_from_d3(pdev, false);
6729 pci_set_power_state(pdev, PCI_D3hot);
6732 return 0;
6734 #endif /* CONFIG_PM_SLEEP */
6736 static int igb_resume(struct device *dev)
6738 struct pci_dev *pdev = to_pci_dev(dev);
6739 struct net_device *netdev = pci_get_drvdata(pdev);
6740 struct igb_adapter *adapter = netdev_priv(netdev);
6741 struct e1000_hw *hw = &adapter->hw;
6742 u32 err;
6744 pci_set_power_state(pdev, PCI_D0);
6745 pci_restore_state(pdev);
6746 pci_save_state(pdev);
6748 err = pci_enable_device_mem(pdev);
6749 if (err) {
6750 dev_err(&pdev->dev,
6751 "igb: Cannot enable PCI device from suspend\n");
6752 return err;
6754 pci_set_master(pdev);
6756 pci_enable_wake(pdev, PCI_D3hot, 0);
6757 pci_enable_wake(pdev, PCI_D3cold, 0);
6759 if (!rtnl_is_locked()) {
6761 * shut up ASSERT_RTNL() warning in
6762 * netif_set_real_num_tx/rx_queues.
6764 rtnl_lock();
6765 err = igb_init_interrupt_scheme(adapter);
6766 rtnl_unlock();
6767 } else {
6768 err = igb_init_interrupt_scheme(adapter);
6770 if (err) {
6771 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6772 return -ENOMEM;
6775 igb_reset(adapter);
6777 /* let the f/w know that the h/w is now under the control of the
6778 * driver. */
6779 igb_get_hw_control(adapter);
6781 wr32(E1000_WUS, ~0);
6783 if (netdev->flags & IFF_UP) {
6784 err = __igb_open(netdev, true);
6785 if (err)
6786 return err;
6789 netif_device_attach(netdev);
6790 return 0;
6793 #ifdef CONFIG_PM_RUNTIME
6794 static int igb_runtime_idle(struct device *dev)
6796 struct pci_dev *pdev = to_pci_dev(dev);
6797 struct net_device *netdev = pci_get_drvdata(pdev);
6798 struct igb_adapter *adapter = netdev_priv(netdev);
6800 if (!igb_has_link(adapter))
6801 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6803 return -EBUSY;
6806 static int igb_runtime_suspend(struct device *dev)
6808 struct pci_dev *pdev = to_pci_dev(dev);
6809 int retval;
6810 bool wake;
6812 retval = __igb_shutdown(pdev, &wake, 1);
6813 if (retval)
6814 return retval;
6816 if (wake) {
6817 pci_prepare_to_sleep(pdev);
6818 } else {
6819 pci_wake_from_d3(pdev, false);
6820 pci_set_power_state(pdev, PCI_D3hot);
6823 return 0;
6826 static int igb_runtime_resume(struct device *dev)
6828 return igb_resume(dev);
6830 #endif /* CONFIG_PM_RUNTIME */
6831 #endif
6833 static void igb_shutdown(struct pci_dev *pdev)
6835 bool wake;
6837 __igb_shutdown(pdev, &wake, 0);
6839 if (system_state == SYSTEM_POWER_OFF) {
6840 pci_wake_from_d3(pdev, wake);
6841 pci_set_power_state(pdev, PCI_D3hot);
6845 #ifdef CONFIG_NET_POLL_CONTROLLER
6847 * Polling 'interrupt' - used by things like netconsole to send skbs
6848 * without having to re-enable interrupts. It's not called while
6849 * the interrupt routine is executing.
6851 static void igb_netpoll(struct net_device *netdev)
6853 struct igb_adapter *adapter = netdev_priv(netdev);
6854 struct e1000_hw *hw = &adapter->hw;
6855 struct igb_q_vector *q_vector;
6856 int i;
6858 for (i = 0; i < adapter->num_q_vectors; i++) {
6859 q_vector = adapter->q_vector[i];
6860 if (adapter->msix_entries)
6861 wr32(E1000_EIMC, q_vector->eims_value);
6862 else
6863 igb_irq_disable(adapter);
6864 napi_schedule(&q_vector->napi);
6867 #endif /* CONFIG_NET_POLL_CONTROLLER */
6870 * igb_io_error_detected - called when PCI error is detected
6871 * @pdev: Pointer to PCI device
6872 * @state: The current pci connection state
6874 * This function is called after a PCI bus error affecting
6875 * this device has been detected.
6877 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6878 pci_channel_state_t state)
6880 struct net_device *netdev = pci_get_drvdata(pdev);
6881 struct igb_adapter *adapter = netdev_priv(netdev);
6883 netif_device_detach(netdev);
6885 if (state == pci_channel_io_perm_failure)
6886 return PCI_ERS_RESULT_DISCONNECT;
6888 if (netif_running(netdev))
6889 igb_down(adapter);
6890 pci_disable_device(pdev);
6892 /* Request a slot slot reset. */
6893 return PCI_ERS_RESULT_NEED_RESET;
6897 * igb_io_slot_reset - called after the pci bus has been reset.
6898 * @pdev: Pointer to PCI device
6900 * Restart the card from scratch, as if from a cold-boot. Implementation
6901 * resembles the first-half of the igb_resume routine.
6903 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6905 struct net_device *netdev = pci_get_drvdata(pdev);
6906 struct igb_adapter *adapter = netdev_priv(netdev);
6907 struct e1000_hw *hw = &adapter->hw;
6908 pci_ers_result_t result;
6909 int err;
6911 if (pci_enable_device_mem(pdev)) {
6912 dev_err(&pdev->dev,
6913 "Cannot re-enable PCI device after reset.\n");
6914 result = PCI_ERS_RESULT_DISCONNECT;
6915 } else {
6916 pci_set_master(pdev);
6917 pci_restore_state(pdev);
6918 pci_save_state(pdev);
6920 pci_enable_wake(pdev, PCI_D3hot, 0);
6921 pci_enable_wake(pdev, PCI_D3cold, 0);
6923 igb_reset(adapter);
6924 wr32(E1000_WUS, ~0);
6925 result = PCI_ERS_RESULT_RECOVERED;
6928 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6929 if (err) {
6930 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6931 "failed 0x%0x\n", err);
6932 /* non-fatal, continue */
6935 return result;
6939 * igb_io_resume - called when traffic can start flowing again.
6940 * @pdev: Pointer to PCI device
6942 * This callback is called when the error recovery driver tells us that
6943 * its OK to resume normal operation. Implementation resembles the
6944 * second-half of the igb_resume routine.
6946 static void igb_io_resume(struct pci_dev *pdev)
6948 struct net_device *netdev = pci_get_drvdata(pdev);
6949 struct igb_adapter *adapter = netdev_priv(netdev);
6951 if (netif_running(netdev)) {
6952 if (igb_up(adapter)) {
6953 dev_err(&pdev->dev, "igb_up failed after reset\n");
6954 return;
6958 netif_device_attach(netdev);
6960 /* let the f/w know that the h/w is now under the control of the
6961 * driver. */
6962 igb_get_hw_control(adapter);
6965 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6966 u8 qsel)
6968 u32 rar_low, rar_high;
6969 struct e1000_hw *hw = &adapter->hw;
6971 /* HW expects these in little endian so we reverse the byte order
6972 * from network order (big endian) to little endian
6974 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6975 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6976 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6978 /* Indicate to hardware the Address is Valid. */
6979 rar_high |= E1000_RAH_AV;
6981 if (hw->mac.type == e1000_82575)
6982 rar_high |= E1000_RAH_POOL_1 * qsel;
6983 else
6984 rar_high |= E1000_RAH_POOL_1 << qsel;
6986 wr32(E1000_RAL(index), rar_low);
6987 wrfl();
6988 wr32(E1000_RAH(index), rar_high);
6989 wrfl();
6992 static int igb_set_vf_mac(struct igb_adapter *adapter,
6993 int vf, unsigned char *mac_addr)
6995 struct e1000_hw *hw = &adapter->hw;
6996 /* VF MAC addresses start at end of receive addresses and moves
6997 * torwards the first, as a result a collision should not be possible */
6998 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7000 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7002 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7004 return 0;
7007 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7009 struct igb_adapter *adapter = netdev_priv(netdev);
7010 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7011 return -EINVAL;
7012 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7013 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7014 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7015 " change effective.");
7016 if (test_bit(__IGB_DOWN, &adapter->state)) {
7017 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7018 " but the PF device is not up.\n");
7019 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7020 " attempting to use the VF device.\n");
7022 return igb_set_vf_mac(adapter, vf, mac);
7025 static int igb_link_mbps(int internal_link_speed)
7027 switch (internal_link_speed) {
7028 case SPEED_100:
7029 return 100;
7030 case SPEED_1000:
7031 return 1000;
7032 default:
7033 return 0;
7037 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7038 int link_speed)
7040 int rf_dec, rf_int;
7041 u32 bcnrc_val;
7043 if (tx_rate != 0) {
7044 /* Calculate the rate factor values to set */
7045 rf_int = link_speed / tx_rate;
7046 rf_dec = (link_speed - (rf_int * tx_rate));
7047 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7049 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7050 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7051 E1000_RTTBCNRC_RF_INT_MASK);
7052 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7053 } else {
7054 bcnrc_val = 0;
7057 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7058 wr32(E1000_RTTBCNRC, bcnrc_val);
7061 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7063 int actual_link_speed, i;
7064 bool reset_rate = false;
7066 /* VF TX rate limit was not set or not supported */
7067 if ((adapter->vf_rate_link_speed == 0) ||
7068 (adapter->hw.mac.type != e1000_82576))
7069 return;
7071 actual_link_speed = igb_link_mbps(adapter->link_speed);
7072 if (actual_link_speed != adapter->vf_rate_link_speed) {
7073 reset_rate = true;
7074 adapter->vf_rate_link_speed = 0;
7075 dev_info(&adapter->pdev->dev,
7076 "Link speed has been changed. VF Transmit "
7077 "rate is disabled\n");
7080 for (i = 0; i < adapter->vfs_allocated_count; i++) {
7081 if (reset_rate)
7082 adapter->vf_data[i].tx_rate = 0;
7084 igb_set_vf_rate_limit(&adapter->hw, i,
7085 adapter->vf_data[i].tx_rate,
7086 actual_link_speed);
7090 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7092 struct igb_adapter *adapter = netdev_priv(netdev);
7093 struct e1000_hw *hw = &adapter->hw;
7094 int actual_link_speed;
7096 if (hw->mac.type != e1000_82576)
7097 return -EOPNOTSUPP;
7099 actual_link_speed = igb_link_mbps(adapter->link_speed);
7100 if ((vf >= adapter->vfs_allocated_count) ||
7101 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7102 (tx_rate < 0) || (tx_rate > actual_link_speed))
7103 return -EINVAL;
7105 adapter->vf_rate_link_speed = actual_link_speed;
7106 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7107 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7109 return 0;
7112 static int igb_ndo_get_vf_config(struct net_device *netdev,
7113 int vf, struct ifla_vf_info *ivi)
7115 struct igb_adapter *adapter = netdev_priv(netdev);
7116 if (vf >= adapter->vfs_allocated_count)
7117 return -EINVAL;
7118 ivi->vf = vf;
7119 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7120 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7121 ivi->vlan = adapter->vf_data[vf].pf_vlan;
7122 ivi->qos = adapter->vf_data[vf].pf_qos;
7123 return 0;
7126 static void igb_vmm_control(struct igb_adapter *adapter)
7128 struct e1000_hw *hw = &adapter->hw;
7129 u32 reg;
7131 switch (hw->mac.type) {
7132 case e1000_82575:
7133 default:
7134 /* replication is not supported for 82575 */
7135 return;
7136 case e1000_82576:
7137 /* notify HW that the MAC is adding vlan tags */
7138 reg = rd32(E1000_DTXCTL);
7139 reg |= E1000_DTXCTL_VLAN_ADDED;
7140 wr32(E1000_DTXCTL, reg);
7141 case e1000_82580:
7142 /* enable replication vlan tag stripping */
7143 reg = rd32(E1000_RPLOLR);
7144 reg |= E1000_RPLOLR_STRVLAN;
7145 wr32(E1000_RPLOLR, reg);
7146 case e1000_i350:
7147 /* none of the above registers are supported by i350 */
7148 break;
7151 if (adapter->vfs_allocated_count) {
7152 igb_vmdq_set_loopback_pf(hw, true);
7153 igb_vmdq_set_replication_pf(hw, true);
7154 igb_vmdq_set_anti_spoofing_pf(hw, true,
7155 adapter->vfs_allocated_count);
7156 } else {
7157 igb_vmdq_set_loopback_pf(hw, false);
7158 igb_vmdq_set_replication_pf(hw, false);
7162 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7164 struct e1000_hw *hw = &adapter->hw;
7165 u32 dmac_thr;
7166 u16 hwm;
7168 if (hw->mac.type > e1000_82580) {
7169 if (adapter->flags & IGB_FLAG_DMAC) {
7170 u32 reg;
7172 /* force threshold to 0. */
7173 wr32(E1000_DMCTXTH, 0);
7176 * DMA Coalescing high water mark needs to be greater
7177 * than the Rx threshold. Set hwm to PBA - max frame
7178 * size in 16B units, capping it at PBA - 6KB.
7180 hwm = 64 * pba - adapter->max_frame_size / 16;
7181 if (hwm < 64 * (pba - 6))
7182 hwm = 64 * (pba - 6);
7183 reg = rd32(E1000_FCRTC);
7184 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7185 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7186 & E1000_FCRTC_RTH_COAL_MASK);
7187 wr32(E1000_FCRTC, reg);
7190 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7191 * frame size, capping it at PBA - 10KB.
7193 dmac_thr = pba - adapter->max_frame_size / 512;
7194 if (dmac_thr < pba - 10)
7195 dmac_thr = pba - 10;
7196 reg = rd32(E1000_DMACR);
7197 reg &= ~E1000_DMACR_DMACTHR_MASK;
7198 reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7199 & E1000_DMACR_DMACTHR_MASK);
7201 /* transition to L0x or L1 if available..*/
7202 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7204 /* watchdog timer= +-1000 usec in 32usec intervals */
7205 reg |= (1000 >> 5);
7206 wr32(E1000_DMACR, reg);
7209 * no lower threshold to disable
7210 * coalescing(smart fifb)-UTRESH=0
7212 wr32(E1000_DMCRTRH, 0);
7214 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7216 wr32(E1000_DMCTLX, reg);
7219 * free space in tx packet buffer to wake from
7220 * DMA coal
7222 wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7223 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7226 * make low power state decision controlled
7227 * by DMA coal
7229 reg = rd32(E1000_PCIEMISC);
7230 reg &= ~E1000_PCIEMISC_LX_DECISION;
7231 wr32(E1000_PCIEMISC, reg);
7232 } /* endif adapter->dmac is not disabled */
7233 } else if (hw->mac.type == e1000_82580) {
7234 u32 reg = rd32(E1000_PCIEMISC);
7235 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7236 wr32(E1000_DMACR, 0);
7240 /* igb_main.c */