igb: make local functions static
[linux-2.6/libata-dev.git] / drivers / net / ethernet / intel / igb / igb_main.c
blob92fd642b2dfe29a4b085f352756154f84b6012fb
1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
62 #define MAJ 3
63 #define MIN 2
64 #define BUILD 10
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
73 static const struct e1000_info *igb_info_tbl[] = {
74 [board_82575] = &e1000_82575_info,
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103 /* required last entry */
104 {0, }
107 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
109 void igb_reset(struct igb_adapter *);
110 static int igb_setup_all_tx_resources(struct igb_adapter *);
111 static int igb_setup_all_rx_resources(struct igb_adapter *);
112 static void igb_free_all_tx_resources(struct igb_adapter *);
113 static void igb_free_all_rx_resources(struct igb_adapter *);
114 static void igb_setup_mrqc(struct igb_adapter *);
115 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116 static void __devexit igb_remove(struct pci_dev *pdev);
117 static void igb_init_hw_timer(struct igb_adapter *adapter);
118 static int igb_sw_init(struct igb_adapter *);
119 static int igb_open(struct net_device *);
120 static int igb_close(struct net_device *);
121 static void igb_configure_tx(struct igb_adapter *);
122 static void igb_configure_rx(struct igb_adapter *);
123 static void igb_clean_all_tx_rings(struct igb_adapter *);
124 static void igb_clean_all_rx_rings(struct igb_adapter *);
125 static void igb_clean_tx_ring(struct igb_ring *);
126 static void igb_clean_rx_ring(struct igb_ring *);
127 static void igb_set_rx_mode(struct net_device *);
128 static void igb_update_phy_info(unsigned long);
129 static void igb_watchdog(unsigned long);
130 static void igb_watchdog_task(struct work_struct *);
131 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133 struct rtnl_link_stats64 *stats);
134 static int igb_change_mtu(struct net_device *, int);
135 static int igb_set_mac(struct net_device *, void *);
136 static void igb_set_uta(struct igb_adapter *adapter);
137 static irqreturn_t igb_intr(int irq, void *);
138 static irqreturn_t igb_intr_msi(int irq, void *);
139 static irqreturn_t igb_msix_other(int irq, void *);
140 static irqreturn_t igb_msix_ring(int irq, void *);
141 #ifdef CONFIG_IGB_DCA
142 static void igb_update_dca(struct igb_q_vector *);
143 static void igb_setup_dca(struct igb_adapter *);
144 #endif /* CONFIG_IGB_DCA */
145 static int igb_poll(struct napi_struct *, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector *);
147 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149 static void igb_tx_timeout(struct net_device *);
150 static void igb_reset_task(struct work_struct *);
151 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152 static int igb_vlan_rx_add_vid(struct net_device *, u16);
153 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154 static void igb_restore_vlan(struct igb_adapter *);
155 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156 static void igb_ping_all_vfs(struct igb_adapter *);
157 static void igb_msg_task(struct igb_adapter *);
158 static void igb_vmm_control(struct igb_adapter *);
159 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163 int vf, u16 vlan, u8 qos);
164 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166 struct ifla_vf_info *ivi);
167 static void igb_check_vf_rate_limit(struct igb_adapter *);
169 #ifdef CONFIG_PCI_IOV
170 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172 static int igb_check_vf_assignment(struct igb_adapter *adapter);
173 #endif
175 #ifdef CONFIG_PM
176 static int igb_suspend(struct device *);
177 static int igb_resume(struct device *);
178 #ifdef CONFIG_PM_RUNTIME
179 static int igb_runtime_suspend(struct device *dev);
180 static int igb_runtime_resume(struct device *dev);
181 static int igb_runtime_idle(struct device *dev);
182 #endif
183 static const struct dev_pm_ops igb_pm_ops = {
184 SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
185 SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
186 igb_runtime_idle)
188 #endif
189 static void igb_shutdown(struct pci_dev *);
190 #ifdef CONFIG_IGB_DCA
191 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
192 static struct notifier_block dca_notifier = {
193 .notifier_call = igb_notify_dca,
194 .next = NULL,
195 .priority = 0
197 #endif
198 #ifdef CONFIG_NET_POLL_CONTROLLER
199 /* for netdump / net console */
200 static void igb_netpoll(struct net_device *);
201 #endif
202 #ifdef CONFIG_PCI_IOV
203 static unsigned int max_vfs = 0;
204 module_param(max_vfs, uint, 0);
205 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
206 "per physical function");
207 #endif /* CONFIG_PCI_IOV */
209 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
210 pci_channel_state_t);
211 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
212 static void igb_io_resume(struct pci_dev *);
214 static struct pci_error_handlers igb_err_handler = {
215 .error_detected = igb_io_error_detected,
216 .slot_reset = igb_io_slot_reset,
217 .resume = igb_io_resume,
220 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
222 static struct pci_driver igb_driver = {
223 .name = igb_driver_name,
224 .id_table = igb_pci_tbl,
225 .probe = igb_probe,
226 .remove = __devexit_p(igb_remove),
227 #ifdef CONFIG_PM
228 .driver.pm = &igb_pm_ops,
229 #endif
230 .shutdown = igb_shutdown,
231 .err_handler = &igb_err_handler
234 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
235 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
236 MODULE_LICENSE("GPL");
237 MODULE_VERSION(DRV_VERSION);
239 struct igb_reg_info {
240 u32 ofs;
241 char *name;
244 static const struct igb_reg_info igb_reg_info_tbl[] = {
246 /* General Registers */
247 {E1000_CTRL, "CTRL"},
248 {E1000_STATUS, "STATUS"},
249 {E1000_CTRL_EXT, "CTRL_EXT"},
251 /* Interrupt Registers */
252 {E1000_ICR, "ICR"},
254 /* RX Registers */
255 {E1000_RCTL, "RCTL"},
256 {E1000_RDLEN(0), "RDLEN"},
257 {E1000_RDH(0), "RDH"},
258 {E1000_RDT(0), "RDT"},
259 {E1000_RXDCTL(0), "RXDCTL"},
260 {E1000_RDBAL(0), "RDBAL"},
261 {E1000_RDBAH(0), "RDBAH"},
263 /* TX Registers */
264 {E1000_TCTL, "TCTL"},
265 {E1000_TDBAL(0), "TDBAL"},
266 {E1000_TDBAH(0), "TDBAH"},
267 {E1000_TDLEN(0), "TDLEN"},
268 {E1000_TDH(0), "TDH"},
269 {E1000_TDT(0), "TDT"},
270 {E1000_TXDCTL(0), "TXDCTL"},
271 {E1000_TDFH, "TDFH"},
272 {E1000_TDFT, "TDFT"},
273 {E1000_TDFHS, "TDFHS"},
274 {E1000_TDFPC, "TDFPC"},
276 /* List Terminator */
281 * igb_regdump - register printout routine
283 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
285 int n = 0;
286 char rname[16];
287 u32 regs[8];
289 switch (reginfo->ofs) {
290 case E1000_RDLEN(0):
291 for (n = 0; n < 4; n++)
292 regs[n] = rd32(E1000_RDLEN(n));
293 break;
294 case E1000_RDH(0):
295 for (n = 0; n < 4; n++)
296 regs[n] = rd32(E1000_RDH(n));
297 break;
298 case E1000_RDT(0):
299 for (n = 0; n < 4; n++)
300 regs[n] = rd32(E1000_RDT(n));
301 break;
302 case E1000_RXDCTL(0):
303 for (n = 0; n < 4; n++)
304 regs[n] = rd32(E1000_RXDCTL(n));
305 break;
306 case E1000_RDBAL(0):
307 for (n = 0; n < 4; n++)
308 regs[n] = rd32(E1000_RDBAL(n));
309 break;
310 case E1000_RDBAH(0):
311 for (n = 0; n < 4; n++)
312 regs[n] = rd32(E1000_RDBAH(n));
313 break;
314 case E1000_TDBAL(0):
315 for (n = 0; n < 4; n++)
316 regs[n] = rd32(E1000_RDBAL(n));
317 break;
318 case E1000_TDBAH(0):
319 for (n = 0; n < 4; n++)
320 regs[n] = rd32(E1000_TDBAH(n));
321 break;
322 case E1000_TDLEN(0):
323 for (n = 0; n < 4; n++)
324 regs[n] = rd32(E1000_TDLEN(n));
325 break;
326 case E1000_TDH(0):
327 for (n = 0; n < 4; n++)
328 regs[n] = rd32(E1000_TDH(n));
329 break;
330 case E1000_TDT(0):
331 for (n = 0; n < 4; n++)
332 regs[n] = rd32(E1000_TDT(n));
333 break;
334 case E1000_TXDCTL(0):
335 for (n = 0; n < 4; n++)
336 regs[n] = rd32(E1000_TXDCTL(n));
337 break;
338 default:
339 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
340 return;
343 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
344 pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
345 regs[2], regs[3]);
349 * igb_dump - Print registers, tx-rings and rx-rings
351 static void igb_dump(struct igb_adapter *adapter)
353 struct net_device *netdev = adapter->netdev;
354 struct e1000_hw *hw = &adapter->hw;
355 struct igb_reg_info *reginfo;
356 struct igb_ring *tx_ring;
357 union e1000_adv_tx_desc *tx_desc;
358 struct my_u0 { u64 a; u64 b; } *u0;
359 struct igb_ring *rx_ring;
360 union e1000_adv_rx_desc *rx_desc;
361 u32 staterr;
362 u16 i, n;
364 if (!netif_msg_hw(adapter))
365 return;
367 /* Print netdevice Info */
368 if (netdev) {
369 dev_info(&adapter->pdev->dev, "Net device Info\n");
370 pr_info("Device Name state trans_start "
371 "last_rx\n");
372 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
373 netdev->state, netdev->trans_start, netdev->last_rx);
376 /* Print Registers */
377 dev_info(&adapter->pdev->dev, "Register Dump\n");
378 pr_info(" Register Name Value\n");
379 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
380 reginfo->name; reginfo++) {
381 igb_regdump(hw, reginfo);
384 /* Print TX Ring Summary */
385 if (!netdev || !netif_running(netdev))
386 goto exit;
388 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
389 pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n");
390 for (n = 0; n < adapter->num_tx_queues; n++) {
391 struct igb_tx_buffer *buffer_info;
392 tx_ring = adapter->tx_ring[n];
393 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
394 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
395 n, tx_ring->next_to_use, tx_ring->next_to_clean,
396 (u64)buffer_info->dma,
397 buffer_info->length,
398 buffer_info->next_to_watch,
399 (u64)buffer_info->time_stamp);
402 /* Print TX Rings */
403 if (!netif_msg_tx_done(adapter))
404 goto rx_ring_summary;
406 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
408 /* Transmit Descriptor Formats
410 * Advanced Transmit Descriptor
411 * +--------------------------------------------------------------+
412 * 0 | Buffer Address [63:0] |
413 * +--------------------------------------------------------------+
414 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
415 * +--------------------------------------------------------------+
416 * 63 46 45 40 39 38 36 35 32 31 24 15 0
419 for (n = 0; n < adapter->num_tx_queues; n++) {
420 tx_ring = adapter->tx_ring[n];
421 pr_info("------------------------------------\n");
422 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
423 pr_info("------------------------------------\n");
424 pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] "
425 "[bi->dma ] leng ntw timestamp "
426 "bi->skb\n");
428 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
429 const char *next_desc;
430 struct igb_tx_buffer *buffer_info;
431 tx_desc = IGB_TX_DESC(tx_ring, i);
432 buffer_info = &tx_ring->tx_buffer_info[i];
433 u0 = (struct my_u0 *)tx_desc;
434 if (i == tx_ring->next_to_use &&
435 i == tx_ring->next_to_clean)
436 next_desc = " NTC/U";
437 else if (i == tx_ring->next_to_use)
438 next_desc = " NTU";
439 else if (i == tx_ring->next_to_clean)
440 next_desc = " NTC";
441 else
442 next_desc = "";
444 pr_info("T [0x%03X] %016llX %016llX %016llX"
445 " %04X %p %016llX %p%s\n", i,
446 le64_to_cpu(u0->a),
447 le64_to_cpu(u0->b),
448 (u64)buffer_info->dma,
449 buffer_info->length,
450 buffer_info->next_to_watch,
451 (u64)buffer_info->time_stamp,
452 buffer_info->skb, next_desc);
454 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
455 print_hex_dump(KERN_INFO, "",
456 DUMP_PREFIX_ADDRESS,
457 16, 1, phys_to_virt(buffer_info->dma),
458 buffer_info->length, true);
462 /* Print RX Rings Summary */
463 rx_ring_summary:
464 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
465 pr_info("Queue [NTU] [NTC]\n");
466 for (n = 0; n < adapter->num_rx_queues; n++) {
467 rx_ring = adapter->rx_ring[n];
468 pr_info(" %5d %5X %5X\n",
469 n, rx_ring->next_to_use, rx_ring->next_to_clean);
472 /* Print RX Rings */
473 if (!netif_msg_rx_status(adapter))
474 goto exit;
476 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
478 /* Advanced Receive Descriptor (Read) Format
479 * 63 1 0
480 * +-----------------------------------------------------+
481 * 0 | Packet Buffer Address [63:1] |A0/NSE|
482 * +----------------------------------------------+------+
483 * 8 | Header Buffer Address [63:1] | DD |
484 * +-----------------------------------------------------+
487 * Advanced Receive Descriptor (Write-Back) Format
489 * 63 48 47 32 31 30 21 20 17 16 4 3 0
490 * +------------------------------------------------------+
491 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
492 * | Checksum Ident | | | | Type | Type |
493 * +------------------------------------------------------+
494 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
495 * +------------------------------------------------------+
496 * 63 48 47 32 31 20 19 0
499 for (n = 0; n < adapter->num_rx_queues; n++) {
500 rx_ring = adapter->rx_ring[n];
501 pr_info("------------------------------------\n");
502 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
503 pr_info("------------------------------------\n");
504 pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] "
505 "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n");
506 pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----"
507 "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
509 for (i = 0; i < rx_ring->count; i++) {
510 const char *next_desc;
511 struct igb_rx_buffer *buffer_info;
512 buffer_info = &rx_ring->rx_buffer_info[i];
513 rx_desc = IGB_RX_DESC(rx_ring, i);
514 u0 = (struct my_u0 *)rx_desc;
515 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
517 if (i == rx_ring->next_to_use)
518 next_desc = " NTU";
519 else if (i == rx_ring->next_to_clean)
520 next_desc = " NTC";
521 else
522 next_desc = "";
524 if (staterr & E1000_RXD_STAT_DD) {
525 /* Descriptor Done */
526 pr_info("%s[0x%03X] %016llX %016llX -------"
527 "--------- %p%s\n", "RWB", i,
528 le64_to_cpu(u0->a),
529 le64_to_cpu(u0->b),
530 buffer_info->skb, next_desc);
531 } else {
532 pr_info("%s[0x%03X] %016llX %016llX %016llX"
533 " %p%s\n", "R ", i,
534 le64_to_cpu(u0->a),
535 le64_to_cpu(u0->b),
536 (u64)buffer_info->dma,
537 buffer_info->skb, next_desc);
539 if (netif_msg_pktdata(adapter)) {
540 print_hex_dump(KERN_INFO, "",
541 DUMP_PREFIX_ADDRESS,
542 16, 1,
543 phys_to_virt(buffer_info->dma),
544 IGB_RX_HDR_LEN, true);
545 print_hex_dump(KERN_INFO, "",
546 DUMP_PREFIX_ADDRESS,
547 16, 1,
548 phys_to_virt(
549 buffer_info->page_dma +
550 buffer_info->page_offset),
551 PAGE_SIZE/2, true);
557 exit:
558 return;
563 * igb_read_clock - read raw cycle counter (to be used by time counter)
565 static cycle_t igb_read_clock(const struct cyclecounter *tc)
567 struct igb_adapter *adapter =
568 container_of(tc, struct igb_adapter, cycles);
569 struct e1000_hw *hw = &adapter->hw;
570 u64 stamp = 0;
571 int shift = 0;
574 * The timestamp latches on lowest register read. For the 82580
575 * the lowest register is SYSTIMR instead of SYSTIML. However we never
576 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
578 if (hw->mac.type >= e1000_82580) {
579 stamp = rd32(E1000_SYSTIMR) >> 8;
580 shift = IGB_82580_TSYNC_SHIFT;
583 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
584 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
585 return stamp;
589 * igb_get_hw_dev - return device
590 * used by hardware layer to print debugging information
592 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
594 struct igb_adapter *adapter = hw->back;
595 return adapter->netdev;
599 * igb_init_module - Driver Registration Routine
601 * igb_init_module is the first routine called when the driver is
602 * loaded. All it does is register with the PCI subsystem.
604 static int __init igb_init_module(void)
606 int ret;
607 pr_info("%s - version %s\n",
608 igb_driver_string, igb_driver_version);
610 pr_info("%s\n", igb_copyright);
612 #ifdef CONFIG_IGB_DCA
613 dca_register_notify(&dca_notifier);
614 #endif
615 ret = pci_register_driver(&igb_driver);
616 return ret;
619 module_init(igb_init_module);
622 * igb_exit_module - Driver Exit Cleanup Routine
624 * igb_exit_module is called just before the driver is removed
625 * from memory.
627 static void __exit igb_exit_module(void)
629 #ifdef CONFIG_IGB_DCA
630 dca_unregister_notify(&dca_notifier);
631 #endif
632 pci_unregister_driver(&igb_driver);
635 module_exit(igb_exit_module);
637 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
639 * igb_cache_ring_register - Descriptor ring to register mapping
640 * @adapter: board private structure to initialize
642 * Once we know the feature-set enabled for the device, we'll cache
643 * the register offset the descriptor ring is assigned to.
645 static void igb_cache_ring_register(struct igb_adapter *adapter)
647 int i = 0, j = 0;
648 u32 rbase_offset = adapter->vfs_allocated_count;
650 switch (adapter->hw.mac.type) {
651 case e1000_82576:
652 /* The queues are allocated for virtualization such that VF 0
653 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
654 * In order to avoid collision we start at the first free queue
655 * and continue consuming queues in the same sequence
657 if (adapter->vfs_allocated_count) {
658 for (; i < adapter->rss_queues; i++)
659 adapter->rx_ring[i]->reg_idx = rbase_offset +
660 Q_IDX_82576(i);
662 case e1000_82575:
663 case e1000_82580:
664 case e1000_i350:
665 default:
666 for (; i < adapter->num_rx_queues; i++)
667 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
668 for (; j < adapter->num_tx_queues; j++)
669 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
670 break;
674 static void igb_free_queues(struct igb_adapter *adapter)
676 int i;
678 for (i = 0; i < adapter->num_tx_queues; i++) {
679 kfree(adapter->tx_ring[i]);
680 adapter->tx_ring[i] = NULL;
682 for (i = 0; i < adapter->num_rx_queues; i++) {
683 kfree(adapter->rx_ring[i]);
684 adapter->rx_ring[i] = NULL;
686 adapter->num_rx_queues = 0;
687 adapter->num_tx_queues = 0;
691 * igb_alloc_queues - Allocate memory for all rings
692 * @adapter: board private structure to initialize
694 * We allocate one ring per queue at run-time since we don't know the
695 * number of queues at compile-time.
697 static int igb_alloc_queues(struct igb_adapter *adapter)
699 struct igb_ring *ring;
700 int i;
701 int orig_node = adapter->node;
703 for (i = 0; i < adapter->num_tx_queues; i++) {
704 if (orig_node == -1) {
705 int cur_node = next_online_node(adapter->node);
706 if (cur_node == MAX_NUMNODES)
707 cur_node = first_online_node;
708 adapter->node = cur_node;
710 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
711 adapter->node);
712 if (!ring)
713 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
714 if (!ring)
715 goto err;
716 ring->count = adapter->tx_ring_count;
717 ring->queue_index = i;
718 ring->dev = &adapter->pdev->dev;
719 ring->netdev = adapter->netdev;
720 ring->numa_node = adapter->node;
721 /* For 82575, context index must be unique per ring. */
722 if (adapter->hw.mac.type == e1000_82575)
723 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
724 adapter->tx_ring[i] = ring;
726 /* Restore the adapter's original node */
727 adapter->node = orig_node;
729 for (i = 0; i < adapter->num_rx_queues; i++) {
730 if (orig_node == -1) {
731 int cur_node = next_online_node(adapter->node);
732 if (cur_node == MAX_NUMNODES)
733 cur_node = first_online_node;
734 adapter->node = cur_node;
736 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
737 adapter->node);
738 if (!ring)
739 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
740 if (!ring)
741 goto err;
742 ring->count = adapter->rx_ring_count;
743 ring->queue_index = i;
744 ring->dev = &adapter->pdev->dev;
745 ring->netdev = adapter->netdev;
746 ring->numa_node = adapter->node;
747 /* set flag indicating ring supports SCTP checksum offload */
748 if (adapter->hw.mac.type >= e1000_82576)
749 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
751 /* On i350, loopback VLAN packets have the tag byte-swapped. */
752 if (adapter->hw.mac.type == e1000_i350)
753 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
755 adapter->rx_ring[i] = ring;
757 /* Restore the adapter's original node */
758 adapter->node = orig_node;
760 igb_cache_ring_register(adapter);
762 return 0;
764 err:
765 /* Restore the adapter's original node */
766 adapter->node = orig_node;
767 igb_free_queues(adapter);
769 return -ENOMEM;
773 * igb_write_ivar - configure ivar for given MSI-X vector
774 * @hw: pointer to the HW structure
775 * @msix_vector: vector number we are allocating to a given ring
776 * @index: row index of IVAR register to write within IVAR table
777 * @offset: column offset of in IVAR, should be multiple of 8
779 * This function is intended to handle the writing of the IVAR register
780 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
781 * each containing an cause allocation for an Rx and Tx ring, and a
782 * variable number of rows depending on the number of queues supported.
784 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
785 int index, int offset)
787 u32 ivar = array_rd32(E1000_IVAR0, index);
789 /* clear any bits that are currently set */
790 ivar &= ~((u32)0xFF << offset);
792 /* write vector and valid bit */
793 ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
795 array_wr32(E1000_IVAR0, index, ivar);
798 #define IGB_N0_QUEUE -1
799 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
801 struct igb_adapter *adapter = q_vector->adapter;
802 struct e1000_hw *hw = &adapter->hw;
803 int rx_queue = IGB_N0_QUEUE;
804 int tx_queue = IGB_N0_QUEUE;
805 u32 msixbm = 0;
807 if (q_vector->rx.ring)
808 rx_queue = q_vector->rx.ring->reg_idx;
809 if (q_vector->tx.ring)
810 tx_queue = q_vector->tx.ring->reg_idx;
812 switch (hw->mac.type) {
813 case e1000_82575:
814 /* The 82575 assigns vectors using a bitmask, which matches the
815 bitmask for the EICR/EIMS/EIMC registers. To assign one
816 or more queues to a vector, we write the appropriate bits
817 into the MSIXBM register for that vector. */
818 if (rx_queue > IGB_N0_QUEUE)
819 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
820 if (tx_queue > IGB_N0_QUEUE)
821 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
822 if (!adapter->msix_entries && msix_vector == 0)
823 msixbm |= E1000_EIMS_OTHER;
824 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
825 q_vector->eims_value = msixbm;
826 break;
827 case e1000_82576:
829 * 82576 uses a table that essentially consists of 2 columns
830 * with 8 rows. The ordering is column-major so we use the
831 * lower 3 bits as the row index, and the 4th bit as the
832 * column offset.
834 if (rx_queue > IGB_N0_QUEUE)
835 igb_write_ivar(hw, msix_vector,
836 rx_queue & 0x7,
837 (rx_queue & 0x8) << 1);
838 if (tx_queue > IGB_N0_QUEUE)
839 igb_write_ivar(hw, msix_vector,
840 tx_queue & 0x7,
841 ((tx_queue & 0x8) << 1) + 8);
842 q_vector->eims_value = 1 << msix_vector;
843 break;
844 case e1000_82580:
845 case e1000_i350:
847 * On 82580 and newer adapters the scheme is similar to 82576
848 * however instead of ordering column-major we have things
849 * ordered row-major. So we traverse the table by using
850 * bit 0 as the column offset, and the remaining bits as the
851 * row index.
853 if (rx_queue > IGB_N0_QUEUE)
854 igb_write_ivar(hw, msix_vector,
855 rx_queue >> 1,
856 (rx_queue & 0x1) << 4);
857 if (tx_queue > IGB_N0_QUEUE)
858 igb_write_ivar(hw, msix_vector,
859 tx_queue >> 1,
860 ((tx_queue & 0x1) << 4) + 8);
861 q_vector->eims_value = 1 << msix_vector;
862 break;
863 default:
864 BUG();
865 break;
868 /* add q_vector eims value to global eims_enable_mask */
869 adapter->eims_enable_mask |= q_vector->eims_value;
871 /* configure q_vector to set itr on first interrupt */
872 q_vector->set_itr = 1;
876 * igb_configure_msix - Configure MSI-X hardware
878 * igb_configure_msix sets up the hardware to properly
879 * generate MSI-X interrupts.
881 static void igb_configure_msix(struct igb_adapter *adapter)
883 u32 tmp;
884 int i, vector = 0;
885 struct e1000_hw *hw = &adapter->hw;
887 adapter->eims_enable_mask = 0;
889 /* set vector for other causes, i.e. link changes */
890 switch (hw->mac.type) {
891 case e1000_82575:
892 tmp = rd32(E1000_CTRL_EXT);
893 /* enable MSI-X PBA support*/
894 tmp |= E1000_CTRL_EXT_PBA_CLR;
896 /* Auto-Mask interrupts upon ICR read. */
897 tmp |= E1000_CTRL_EXT_EIAME;
898 tmp |= E1000_CTRL_EXT_IRCA;
900 wr32(E1000_CTRL_EXT, tmp);
902 /* enable msix_other interrupt */
903 array_wr32(E1000_MSIXBM(0), vector++,
904 E1000_EIMS_OTHER);
905 adapter->eims_other = E1000_EIMS_OTHER;
907 break;
909 case e1000_82576:
910 case e1000_82580:
911 case e1000_i350:
912 /* Turn on MSI-X capability first, or our settings
913 * won't stick. And it will take days to debug. */
914 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
915 E1000_GPIE_PBA | E1000_GPIE_EIAME |
916 E1000_GPIE_NSICR);
918 /* enable msix_other interrupt */
919 adapter->eims_other = 1 << vector;
920 tmp = (vector++ | E1000_IVAR_VALID) << 8;
922 wr32(E1000_IVAR_MISC, tmp);
923 break;
924 default:
925 /* do nothing, since nothing else supports MSI-X */
926 break;
927 } /* switch (hw->mac.type) */
929 adapter->eims_enable_mask |= adapter->eims_other;
931 for (i = 0; i < adapter->num_q_vectors; i++)
932 igb_assign_vector(adapter->q_vector[i], vector++);
934 wrfl();
938 * igb_request_msix - Initialize MSI-X interrupts
940 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
941 * kernel.
943 static int igb_request_msix(struct igb_adapter *adapter)
945 struct net_device *netdev = adapter->netdev;
946 struct e1000_hw *hw = &adapter->hw;
947 int i, err = 0, vector = 0;
949 err = request_irq(adapter->msix_entries[vector].vector,
950 igb_msix_other, 0, netdev->name, adapter);
951 if (err)
952 goto out;
953 vector++;
955 for (i = 0; i < adapter->num_q_vectors; i++) {
956 struct igb_q_vector *q_vector = adapter->q_vector[i];
958 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
960 if (q_vector->rx.ring && q_vector->tx.ring)
961 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
962 q_vector->rx.ring->queue_index);
963 else if (q_vector->tx.ring)
964 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
965 q_vector->tx.ring->queue_index);
966 else if (q_vector->rx.ring)
967 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
968 q_vector->rx.ring->queue_index);
969 else
970 sprintf(q_vector->name, "%s-unused", netdev->name);
972 err = request_irq(adapter->msix_entries[vector].vector,
973 igb_msix_ring, 0, q_vector->name,
974 q_vector);
975 if (err)
976 goto out;
977 vector++;
980 igb_configure_msix(adapter);
981 return 0;
982 out:
983 return err;
986 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
988 if (adapter->msix_entries) {
989 pci_disable_msix(adapter->pdev);
990 kfree(adapter->msix_entries);
991 adapter->msix_entries = NULL;
992 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
993 pci_disable_msi(adapter->pdev);
998 * igb_free_q_vectors - Free memory allocated for interrupt vectors
999 * @adapter: board private structure to initialize
1001 * This function frees the memory allocated to the q_vectors. In addition if
1002 * NAPI is enabled it will delete any references to the NAPI struct prior
1003 * to freeing the q_vector.
1005 static void igb_free_q_vectors(struct igb_adapter *adapter)
1007 int v_idx;
1009 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1010 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1011 adapter->q_vector[v_idx] = NULL;
1012 if (!q_vector)
1013 continue;
1014 netif_napi_del(&q_vector->napi);
1015 kfree(q_vector);
1017 adapter->num_q_vectors = 0;
1021 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1023 * This function resets the device so that it has 0 rx queues, tx queues, and
1024 * MSI-X interrupts allocated.
1026 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1028 igb_free_queues(adapter);
1029 igb_free_q_vectors(adapter);
1030 igb_reset_interrupt_capability(adapter);
1034 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1036 * Attempt to configure interrupts using the best available
1037 * capabilities of the hardware and kernel.
1039 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1041 int err;
1042 int numvecs, i;
1044 /* Number of supported queues. */
1045 adapter->num_rx_queues = adapter->rss_queues;
1046 if (adapter->vfs_allocated_count)
1047 adapter->num_tx_queues = 1;
1048 else
1049 adapter->num_tx_queues = adapter->rss_queues;
1051 /* start with one vector for every rx queue */
1052 numvecs = adapter->num_rx_queues;
1054 /* if tx handler is separate add 1 for every tx queue */
1055 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1056 numvecs += adapter->num_tx_queues;
1058 /* store the number of vectors reserved for queues */
1059 adapter->num_q_vectors = numvecs;
1061 /* add 1 vector for link status interrupts */
1062 numvecs++;
1063 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1064 GFP_KERNEL);
1065 if (!adapter->msix_entries)
1066 goto msi_only;
1068 for (i = 0; i < numvecs; i++)
1069 adapter->msix_entries[i].entry = i;
1071 err = pci_enable_msix(adapter->pdev,
1072 adapter->msix_entries,
1073 numvecs);
1074 if (err == 0)
1075 goto out;
1077 igb_reset_interrupt_capability(adapter);
1079 /* If we can't do MSI-X, try MSI */
1080 msi_only:
1081 #ifdef CONFIG_PCI_IOV
1082 /* disable SR-IOV for non MSI-X configurations */
1083 if (adapter->vf_data) {
1084 struct e1000_hw *hw = &adapter->hw;
1085 /* disable iov and allow time for transactions to clear */
1086 pci_disable_sriov(adapter->pdev);
1087 msleep(500);
1089 kfree(adapter->vf_data);
1090 adapter->vf_data = NULL;
1091 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1092 wrfl();
1093 msleep(100);
1094 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1096 #endif
1097 adapter->vfs_allocated_count = 0;
1098 adapter->rss_queues = 1;
1099 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1100 adapter->num_rx_queues = 1;
1101 adapter->num_tx_queues = 1;
1102 adapter->num_q_vectors = 1;
1103 if (!pci_enable_msi(adapter->pdev))
1104 adapter->flags |= IGB_FLAG_HAS_MSI;
1105 out:
1106 /* Notify the stack of the (possibly) reduced queue counts. */
1107 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1108 return netif_set_real_num_rx_queues(adapter->netdev,
1109 adapter->num_rx_queues);
1113 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1114 * @adapter: board private structure to initialize
1116 * We allocate one q_vector per queue interrupt. If allocation fails we
1117 * return -ENOMEM.
1119 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1121 struct igb_q_vector *q_vector;
1122 struct e1000_hw *hw = &adapter->hw;
1123 int v_idx;
1124 int orig_node = adapter->node;
1126 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1127 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1128 adapter->num_tx_queues)) &&
1129 (adapter->num_rx_queues == v_idx))
1130 adapter->node = orig_node;
1131 if (orig_node == -1) {
1132 int cur_node = next_online_node(adapter->node);
1133 if (cur_node == MAX_NUMNODES)
1134 cur_node = first_online_node;
1135 adapter->node = cur_node;
1137 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1138 adapter->node);
1139 if (!q_vector)
1140 q_vector = kzalloc(sizeof(struct igb_q_vector),
1141 GFP_KERNEL);
1142 if (!q_vector)
1143 goto err_out;
1144 q_vector->adapter = adapter;
1145 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1146 q_vector->itr_val = IGB_START_ITR;
1147 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1148 adapter->q_vector[v_idx] = q_vector;
1150 /* Restore the adapter's original node */
1151 adapter->node = orig_node;
1153 return 0;
1155 err_out:
1156 /* Restore the adapter's original node */
1157 adapter->node = orig_node;
1158 igb_free_q_vectors(adapter);
1159 return -ENOMEM;
1162 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1163 int ring_idx, int v_idx)
1165 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1167 q_vector->rx.ring = adapter->rx_ring[ring_idx];
1168 q_vector->rx.ring->q_vector = q_vector;
1169 q_vector->rx.count++;
1170 q_vector->itr_val = adapter->rx_itr_setting;
1171 if (q_vector->itr_val && q_vector->itr_val <= 3)
1172 q_vector->itr_val = IGB_START_ITR;
1175 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1176 int ring_idx, int v_idx)
1178 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1180 q_vector->tx.ring = adapter->tx_ring[ring_idx];
1181 q_vector->tx.ring->q_vector = q_vector;
1182 q_vector->tx.count++;
1183 q_vector->itr_val = adapter->tx_itr_setting;
1184 q_vector->tx.work_limit = adapter->tx_work_limit;
1185 if (q_vector->itr_val && q_vector->itr_val <= 3)
1186 q_vector->itr_val = IGB_START_ITR;
1190 * igb_map_ring_to_vector - maps allocated queues to vectors
1192 * This function maps the recently allocated queues to vectors.
1194 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1196 int i;
1197 int v_idx = 0;
1199 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1200 (adapter->num_q_vectors < adapter->num_tx_queues))
1201 return -ENOMEM;
1203 if (adapter->num_q_vectors >=
1204 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1205 for (i = 0; i < adapter->num_rx_queues; i++)
1206 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1207 for (i = 0; i < adapter->num_tx_queues; i++)
1208 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1209 } else {
1210 for (i = 0; i < adapter->num_rx_queues; i++) {
1211 if (i < adapter->num_tx_queues)
1212 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1213 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1215 for (; i < adapter->num_tx_queues; i++)
1216 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1218 return 0;
1222 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1224 * This function initializes the interrupts and allocates all of the queues.
1226 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1228 struct pci_dev *pdev = adapter->pdev;
1229 int err;
1231 err = igb_set_interrupt_capability(adapter);
1232 if (err)
1233 return err;
1235 err = igb_alloc_q_vectors(adapter);
1236 if (err) {
1237 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1238 goto err_alloc_q_vectors;
1241 err = igb_alloc_queues(adapter);
1242 if (err) {
1243 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1244 goto err_alloc_queues;
1247 err = igb_map_ring_to_vector(adapter);
1248 if (err) {
1249 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1250 goto err_map_queues;
1254 return 0;
1255 err_map_queues:
1256 igb_free_queues(adapter);
1257 err_alloc_queues:
1258 igb_free_q_vectors(adapter);
1259 err_alloc_q_vectors:
1260 igb_reset_interrupt_capability(adapter);
1261 return err;
1265 * igb_request_irq - initialize interrupts
1267 * Attempts to configure interrupts using the best available
1268 * capabilities of the hardware and kernel.
1270 static int igb_request_irq(struct igb_adapter *adapter)
1272 struct net_device *netdev = adapter->netdev;
1273 struct pci_dev *pdev = adapter->pdev;
1274 int err = 0;
1276 if (adapter->msix_entries) {
1277 err = igb_request_msix(adapter);
1278 if (!err)
1279 goto request_done;
1280 /* fall back to MSI */
1281 igb_clear_interrupt_scheme(adapter);
1282 if (!pci_enable_msi(pdev))
1283 adapter->flags |= IGB_FLAG_HAS_MSI;
1284 igb_free_all_tx_resources(adapter);
1285 igb_free_all_rx_resources(adapter);
1286 adapter->num_tx_queues = 1;
1287 adapter->num_rx_queues = 1;
1288 adapter->num_q_vectors = 1;
1289 err = igb_alloc_q_vectors(adapter);
1290 if (err) {
1291 dev_err(&pdev->dev,
1292 "Unable to allocate memory for vectors\n");
1293 goto request_done;
1295 err = igb_alloc_queues(adapter);
1296 if (err) {
1297 dev_err(&pdev->dev,
1298 "Unable to allocate memory for queues\n");
1299 igb_free_q_vectors(adapter);
1300 goto request_done;
1302 igb_setup_all_tx_resources(adapter);
1303 igb_setup_all_rx_resources(adapter);
1306 igb_assign_vector(adapter->q_vector[0], 0);
1308 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1309 err = request_irq(pdev->irq, igb_intr_msi, 0,
1310 netdev->name, adapter);
1311 if (!err)
1312 goto request_done;
1314 /* fall back to legacy interrupts */
1315 igb_reset_interrupt_capability(adapter);
1316 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1319 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1320 netdev->name, adapter);
1322 if (err)
1323 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1324 err);
1326 request_done:
1327 return err;
1330 static void igb_free_irq(struct igb_adapter *adapter)
1332 if (adapter->msix_entries) {
1333 int vector = 0, i;
1335 free_irq(adapter->msix_entries[vector++].vector, adapter);
1337 for (i = 0; i < adapter->num_q_vectors; i++)
1338 free_irq(adapter->msix_entries[vector++].vector,
1339 adapter->q_vector[i]);
1340 } else {
1341 free_irq(adapter->pdev->irq, adapter);
1346 * igb_irq_disable - Mask off interrupt generation on the NIC
1347 * @adapter: board private structure
1349 static void igb_irq_disable(struct igb_adapter *adapter)
1351 struct e1000_hw *hw = &adapter->hw;
1354 * we need to be careful when disabling interrupts. The VFs are also
1355 * mapped into these registers and so clearing the bits can cause
1356 * issues on the VF drivers so we only need to clear what we set
1358 if (adapter->msix_entries) {
1359 u32 regval = rd32(E1000_EIAM);
1360 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1361 wr32(E1000_EIMC, adapter->eims_enable_mask);
1362 regval = rd32(E1000_EIAC);
1363 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1366 wr32(E1000_IAM, 0);
1367 wr32(E1000_IMC, ~0);
1368 wrfl();
1369 if (adapter->msix_entries) {
1370 int i;
1371 for (i = 0; i < adapter->num_q_vectors; i++)
1372 synchronize_irq(adapter->msix_entries[i].vector);
1373 } else {
1374 synchronize_irq(adapter->pdev->irq);
1379 * igb_irq_enable - Enable default interrupt generation settings
1380 * @adapter: board private structure
1382 static void igb_irq_enable(struct igb_adapter *adapter)
1384 struct e1000_hw *hw = &adapter->hw;
1386 if (adapter->msix_entries) {
1387 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1388 u32 regval = rd32(E1000_EIAC);
1389 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1390 regval = rd32(E1000_EIAM);
1391 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1392 wr32(E1000_EIMS, adapter->eims_enable_mask);
1393 if (adapter->vfs_allocated_count) {
1394 wr32(E1000_MBVFIMR, 0xFF);
1395 ims |= E1000_IMS_VMMB;
1397 wr32(E1000_IMS, ims);
1398 } else {
1399 wr32(E1000_IMS, IMS_ENABLE_MASK |
1400 E1000_IMS_DRSTA);
1401 wr32(E1000_IAM, IMS_ENABLE_MASK |
1402 E1000_IMS_DRSTA);
1406 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1408 struct e1000_hw *hw = &adapter->hw;
1409 u16 vid = adapter->hw.mng_cookie.vlan_id;
1410 u16 old_vid = adapter->mng_vlan_id;
1412 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1413 /* add VID to filter table */
1414 igb_vfta_set(hw, vid, true);
1415 adapter->mng_vlan_id = vid;
1416 } else {
1417 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1420 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1421 (vid != old_vid) &&
1422 !test_bit(old_vid, adapter->active_vlans)) {
1423 /* remove VID from filter table */
1424 igb_vfta_set(hw, old_vid, false);
1429 * igb_release_hw_control - release control of the h/w to f/w
1430 * @adapter: address of board private structure
1432 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1433 * For ASF and Pass Through versions of f/w this means that the
1434 * driver is no longer loaded.
1437 static void igb_release_hw_control(struct igb_adapter *adapter)
1439 struct e1000_hw *hw = &adapter->hw;
1440 u32 ctrl_ext;
1442 /* Let firmware take over control of h/w */
1443 ctrl_ext = rd32(E1000_CTRL_EXT);
1444 wr32(E1000_CTRL_EXT,
1445 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1449 * igb_get_hw_control - get control of the h/w from f/w
1450 * @adapter: address of board private structure
1452 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1453 * For ASF and Pass Through versions of f/w this means that
1454 * the driver is loaded.
1457 static void igb_get_hw_control(struct igb_adapter *adapter)
1459 struct e1000_hw *hw = &adapter->hw;
1460 u32 ctrl_ext;
1462 /* Let firmware know the driver has taken over */
1463 ctrl_ext = rd32(E1000_CTRL_EXT);
1464 wr32(E1000_CTRL_EXT,
1465 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1469 * igb_configure - configure the hardware for RX and TX
1470 * @adapter: private board structure
1472 static void igb_configure(struct igb_adapter *adapter)
1474 struct net_device *netdev = adapter->netdev;
1475 int i;
1477 igb_get_hw_control(adapter);
1478 igb_set_rx_mode(netdev);
1480 igb_restore_vlan(adapter);
1482 igb_setup_tctl(adapter);
1483 igb_setup_mrqc(adapter);
1484 igb_setup_rctl(adapter);
1486 igb_configure_tx(adapter);
1487 igb_configure_rx(adapter);
1489 igb_rx_fifo_flush_82575(&adapter->hw);
1491 /* call igb_desc_unused which always leaves
1492 * at least 1 descriptor unused to make sure
1493 * next_to_use != next_to_clean */
1494 for (i = 0; i < adapter->num_rx_queues; i++) {
1495 struct igb_ring *ring = adapter->rx_ring[i];
1496 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1501 * igb_power_up_link - Power up the phy/serdes link
1502 * @adapter: address of board private structure
1504 void igb_power_up_link(struct igb_adapter *adapter)
1506 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1507 igb_power_up_phy_copper(&adapter->hw);
1508 else
1509 igb_power_up_serdes_link_82575(&adapter->hw);
1510 igb_reset_phy(&adapter->hw);
1514 * igb_power_down_link - Power down the phy/serdes link
1515 * @adapter: address of board private structure
1517 static void igb_power_down_link(struct igb_adapter *adapter)
1519 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1520 igb_power_down_phy_copper_82575(&adapter->hw);
1521 else
1522 igb_shutdown_serdes_link_82575(&adapter->hw);
1526 * igb_up - Open the interface and prepare it to handle traffic
1527 * @adapter: board private structure
1529 int igb_up(struct igb_adapter *adapter)
1531 struct e1000_hw *hw = &adapter->hw;
1532 int i;
1534 /* hardware has been reset, we need to reload some things */
1535 igb_configure(adapter);
1537 clear_bit(__IGB_DOWN, &adapter->state);
1539 for (i = 0; i < adapter->num_q_vectors; i++)
1540 napi_enable(&(adapter->q_vector[i]->napi));
1542 if (adapter->msix_entries)
1543 igb_configure_msix(adapter);
1544 else
1545 igb_assign_vector(adapter->q_vector[0], 0);
1547 /* Clear any pending interrupts. */
1548 rd32(E1000_ICR);
1549 igb_irq_enable(adapter);
1551 /* notify VFs that reset has been completed */
1552 if (adapter->vfs_allocated_count) {
1553 u32 reg_data = rd32(E1000_CTRL_EXT);
1554 reg_data |= E1000_CTRL_EXT_PFRSTD;
1555 wr32(E1000_CTRL_EXT, reg_data);
1558 netif_tx_start_all_queues(adapter->netdev);
1560 /* start the watchdog. */
1561 hw->mac.get_link_status = 1;
1562 schedule_work(&adapter->watchdog_task);
1564 return 0;
1567 void igb_down(struct igb_adapter *adapter)
1569 struct net_device *netdev = adapter->netdev;
1570 struct e1000_hw *hw = &adapter->hw;
1571 u32 tctl, rctl;
1572 int i;
1574 /* signal that we're down so the interrupt handler does not
1575 * reschedule our watchdog timer */
1576 set_bit(__IGB_DOWN, &adapter->state);
1578 /* disable receives in the hardware */
1579 rctl = rd32(E1000_RCTL);
1580 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1581 /* flush and sleep below */
1583 netif_tx_stop_all_queues(netdev);
1585 /* disable transmits in the hardware */
1586 tctl = rd32(E1000_TCTL);
1587 tctl &= ~E1000_TCTL_EN;
1588 wr32(E1000_TCTL, tctl);
1589 /* flush both disables and wait for them to finish */
1590 wrfl();
1591 msleep(10);
1593 for (i = 0; i < adapter->num_q_vectors; i++)
1594 napi_disable(&(adapter->q_vector[i]->napi));
1596 igb_irq_disable(adapter);
1598 del_timer_sync(&adapter->watchdog_timer);
1599 del_timer_sync(&adapter->phy_info_timer);
1601 netif_carrier_off(netdev);
1603 /* record the stats before reset*/
1604 spin_lock(&adapter->stats64_lock);
1605 igb_update_stats(adapter, &adapter->stats64);
1606 spin_unlock(&adapter->stats64_lock);
1608 adapter->link_speed = 0;
1609 adapter->link_duplex = 0;
1611 if (!pci_channel_offline(adapter->pdev))
1612 igb_reset(adapter);
1613 igb_clean_all_tx_rings(adapter);
1614 igb_clean_all_rx_rings(adapter);
1615 #ifdef CONFIG_IGB_DCA
1617 /* since we reset the hardware DCA settings were cleared */
1618 igb_setup_dca(adapter);
1619 #endif
1622 void igb_reinit_locked(struct igb_adapter *adapter)
1624 WARN_ON(in_interrupt());
1625 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1626 msleep(1);
1627 igb_down(adapter);
1628 igb_up(adapter);
1629 clear_bit(__IGB_RESETTING, &adapter->state);
1632 void igb_reset(struct igb_adapter *adapter)
1634 struct pci_dev *pdev = adapter->pdev;
1635 struct e1000_hw *hw = &adapter->hw;
1636 struct e1000_mac_info *mac = &hw->mac;
1637 struct e1000_fc_info *fc = &hw->fc;
1638 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1639 u16 hwm;
1641 /* Repartition Pba for greater than 9k mtu
1642 * To take effect CTRL.RST is required.
1644 switch (mac->type) {
1645 case e1000_i350:
1646 case e1000_82580:
1647 pba = rd32(E1000_RXPBS);
1648 pba = igb_rxpbs_adjust_82580(pba);
1649 break;
1650 case e1000_82576:
1651 pba = rd32(E1000_RXPBS);
1652 pba &= E1000_RXPBS_SIZE_MASK_82576;
1653 break;
1654 case e1000_82575:
1655 default:
1656 pba = E1000_PBA_34K;
1657 break;
1660 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1661 (mac->type < e1000_82576)) {
1662 /* adjust PBA for jumbo frames */
1663 wr32(E1000_PBA, pba);
1665 /* To maintain wire speed transmits, the Tx FIFO should be
1666 * large enough to accommodate two full transmit packets,
1667 * rounded up to the next 1KB and expressed in KB. Likewise,
1668 * the Rx FIFO should be large enough to accommodate at least
1669 * one full receive packet and is similarly rounded up and
1670 * expressed in KB. */
1671 pba = rd32(E1000_PBA);
1672 /* upper 16 bits has Tx packet buffer allocation size in KB */
1673 tx_space = pba >> 16;
1674 /* lower 16 bits has Rx packet buffer allocation size in KB */
1675 pba &= 0xffff;
1676 /* the tx fifo also stores 16 bytes of information about the tx
1677 * but don't include ethernet FCS because hardware appends it */
1678 min_tx_space = (adapter->max_frame_size +
1679 sizeof(union e1000_adv_tx_desc) -
1680 ETH_FCS_LEN) * 2;
1681 min_tx_space = ALIGN(min_tx_space, 1024);
1682 min_tx_space >>= 10;
1683 /* software strips receive CRC, so leave room for it */
1684 min_rx_space = adapter->max_frame_size;
1685 min_rx_space = ALIGN(min_rx_space, 1024);
1686 min_rx_space >>= 10;
1688 /* If current Tx allocation is less than the min Tx FIFO size,
1689 * and the min Tx FIFO size is less than the current Rx FIFO
1690 * allocation, take space away from current Rx allocation */
1691 if (tx_space < min_tx_space &&
1692 ((min_tx_space - tx_space) < pba)) {
1693 pba = pba - (min_tx_space - tx_space);
1695 /* if short on rx space, rx wins and must trump tx
1696 * adjustment */
1697 if (pba < min_rx_space)
1698 pba = min_rx_space;
1700 wr32(E1000_PBA, pba);
1703 /* flow control settings */
1704 /* The high water mark must be low enough to fit one full frame
1705 * (or the size used for early receive) above it in the Rx FIFO.
1706 * Set it to the lower of:
1707 * - 90% of the Rx FIFO size, or
1708 * - the full Rx FIFO size minus one full frame */
1709 hwm = min(((pba << 10) * 9 / 10),
1710 ((pba << 10) - 2 * adapter->max_frame_size));
1712 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1713 fc->low_water = fc->high_water - 16;
1714 fc->pause_time = 0xFFFF;
1715 fc->send_xon = 1;
1716 fc->current_mode = fc->requested_mode;
1718 /* disable receive for all VFs and wait one second */
1719 if (adapter->vfs_allocated_count) {
1720 int i;
1721 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1722 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1724 /* ping all the active vfs to let them know we are going down */
1725 igb_ping_all_vfs(adapter);
1727 /* disable transmits and receives */
1728 wr32(E1000_VFRE, 0);
1729 wr32(E1000_VFTE, 0);
1732 /* Allow time for pending master requests to run */
1733 hw->mac.ops.reset_hw(hw);
1734 wr32(E1000_WUC, 0);
1736 if (hw->mac.ops.init_hw(hw))
1737 dev_err(&pdev->dev, "Hardware Error\n");
1739 igb_init_dmac(adapter, pba);
1740 if (!netif_running(adapter->netdev))
1741 igb_power_down_link(adapter);
1743 igb_update_mng_vlan(adapter);
1745 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1746 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1748 igb_get_phy_info(hw);
1751 static netdev_features_t igb_fix_features(struct net_device *netdev,
1752 netdev_features_t features)
1755 * Since there is no support for separate rx/tx vlan accel
1756 * enable/disable make sure tx flag is always in same state as rx.
1758 if (features & NETIF_F_HW_VLAN_RX)
1759 features |= NETIF_F_HW_VLAN_TX;
1760 else
1761 features &= ~NETIF_F_HW_VLAN_TX;
1763 return features;
1766 static int igb_set_features(struct net_device *netdev,
1767 netdev_features_t features)
1769 netdev_features_t changed = netdev->features ^ features;
1771 if (changed & NETIF_F_HW_VLAN_RX)
1772 igb_vlan_mode(netdev, features);
1774 return 0;
1777 static const struct net_device_ops igb_netdev_ops = {
1778 .ndo_open = igb_open,
1779 .ndo_stop = igb_close,
1780 .ndo_start_xmit = igb_xmit_frame,
1781 .ndo_get_stats64 = igb_get_stats64,
1782 .ndo_set_rx_mode = igb_set_rx_mode,
1783 .ndo_set_mac_address = igb_set_mac,
1784 .ndo_change_mtu = igb_change_mtu,
1785 .ndo_do_ioctl = igb_ioctl,
1786 .ndo_tx_timeout = igb_tx_timeout,
1787 .ndo_validate_addr = eth_validate_addr,
1788 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1789 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1790 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1791 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1792 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1793 .ndo_get_vf_config = igb_ndo_get_vf_config,
1794 #ifdef CONFIG_NET_POLL_CONTROLLER
1795 .ndo_poll_controller = igb_netpoll,
1796 #endif
1797 .ndo_fix_features = igb_fix_features,
1798 .ndo_set_features = igb_set_features,
1802 * igb_probe - Device Initialization Routine
1803 * @pdev: PCI device information struct
1804 * @ent: entry in igb_pci_tbl
1806 * Returns 0 on success, negative on failure
1808 * igb_probe initializes an adapter identified by a pci_dev structure.
1809 * The OS initialization, configuring of the adapter private structure,
1810 * and a hardware reset occur.
1812 static int __devinit igb_probe(struct pci_dev *pdev,
1813 const struct pci_device_id *ent)
1815 struct net_device *netdev;
1816 struct igb_adapter *adapter;
1817 struct e1000_hw *hw;
1818 u16 eeprom_data = 0;
1819 s32 ret_val;
1820 static int global_quad_port_a; /* global quad port a indication */
1821 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1822 unsigned long mmio_start, mmio_len;
1823 int err, pci_using_dac;
1824 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1825 u8 part_str[E1000_PBANUM_LENGTH];
1827 /* Catch broken hardware that put the wrong VF device ID in
1828 * the PCIe SR-IOV capability.
1830 if (pdev->is_virtfn) {
1831 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1832 pci_name(pdev), pdev->vendor, pdev->device);
1833 return -EINVAL;
1836 err = pci_enable_device_mem(pdev);
1837 if (err)
1838 return err;
1840 pci_using_dac = 0;
1841 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1842 if (!err) {
1843 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1844 if (!err)
1845 pci_using_dac = 1;
1846 } else {
1847 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1848 if (err) {
1849 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1850 if (err) {
1851 dev_err(&pdev->dev, "No usable DMA "
1852 "configuration, aborting\n");
1853 goto err_dma;
1858 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1859 IORESOURCE_MEM),
1860 igb_driver_name);
1861 if (err)
1862 goto err_pci_reg;
1864 pci_enable_pcie_error_reporting(pdev);
1866 pci_set_master(pdev);
1867 pci_save_state(pdev);
1869 err = -ENOMEM;
1870 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1871 IGB_MAX_TX_QUEUES);
1872 if (!netdev)
1873 goto err_alloc_etherdev;
1875 SET_NETDEV_DEV(netdev, &pdev->dev);
1877 pci_set_drvdata(pdev, netdev);
1878 adapter = netdev_priv(netdev);
1879 adapter->netdev = netdev;
1880 adapter->pdev = pdev;
1881 hw = &adapter->hw;
1882 hw->back = adapter;
1883 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1885 mmio_start = pci_resource_start(pdev, 0);
1886 mmio_len = pci_resource_len(pdev, 0);
1888 err = -EIO;
1889 hw->hw_addr = ioremap(mmio_start, mmio_len);
1890 if (!hw->hw_addr)
1891 goto err_ioremap;
1893 netdev->netdev_ops = &igb_netdev_ops;
1894 igb_set_ethtool_ops(netdev);
1895 netdev->watchdog_timeo = 5 * HZ;
1897 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1899 netdev->mem_start = mmio_start;
1900 netdev->mem_end = mmio_start + mmio_len;
1902 /* PCI config space info */
1903 hw->vendor_id = pdev->vendor;
1904 hw->device_id = pdev->device;
1905 hw->revision_id = pdev->revision;
1906 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1907 hw->subsystem_device_id = pdev->subsystem_device;
1909 /* Copy the default MAC, PHY and NVM function pointers */
1910 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1911 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1912 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1913 /* Initialize skew-specific constants */
1914 err = ei->get_invariants(hw);
1915 if (err)
1916 goto err_sw_init;
1918 /* setup the private structure */
1919 err = igb_sw_init(adapter);
1920 if (err)
1921 goto err_sw_init;
1923 igb_get_bus_info_pcie(hw);
1925 hw->phy.autoneg_wait_to_complete = false;
1927 /* Copper options */
1928 if (hw->phy.media_type == e1000_media_type_copper) {
1929 hw->phy.mdix = AUTO_ALL_MODES;
1930 hw->phy.disable_polarity_correction = false;
1931 hw->phy.ms_type = e1000_ms_hw_default;
1934 if (igb_check_reset_block(hw))
1935 dev_info(&pdev->dev,
1936 "PHY reset is blocked due to SOL/IDER session.\n");
1939 * features is initialized to 0 in allocation, it might have bits
1940 * set by igb_sw_init so we should use an or instead of an
1941 * assignment.
1943 netdev->features |= NETIF_F_SG |
1944 NETIF_F_IP_CSUM |
1945 NETIF_F_IPV6_CSUM |
1946 NETIF_F_TSO |
1947 NETIF_F_TSO6 |
1948 NETIF_F_RXHASH |
1949 NETIF_F_RXCSUM |
1950 NETIF_F_HW_VLAN_RX |
1951 NETIF_F_HW_VLAN_TX;
1953 /* copy netdev features into list of user selectable features */
1954 netdev->hw_features |= netdev->features;
1956 /* set this bit last since it cannot be part of hw_features */
1957 netdev->features |= NETIF_F_HW_VLAN_FILTER;
1959 netdev->vlan_features |= NETIF_F_TSO |
1960 NETIF_F_TSO6 |
1961 NETIF_F_IP_CSUM |
1962 NETIF_F_IPV6_CSUM |
1963 NETIF_F_SG;
1965 if (pci_using_dac) {
1966 netdev->features |= NETIF_F_HIGHDMA;
1967 netdev->vlan_features |= NETIF_F_HIGHDMA;
1970 if (hw->mac.type >= e1000_82576) {
1971 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1972 netdev->features |= NETIF_F_SCTP_CSUM;
1975 netdev->priv_flags |= IFF_UNICAST_FLT;
1977 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1979 /* before reading the NVM, reset the controller to put the device in a
1980 * known good starting state */
1981 hw->mac.ops.reset_hw(hw);
1983 /* make sure the NVM is good */
1984 if (hw->nvm.ops.validate(hw) < 0) {
1985 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1986 err = -EIO;
1987 goto err_eeprom;
1990 /* copy the MAC address out of the NVM */
1991 if (hw->mac.ops.read_mac_addr(hw))
1992 dev_err(&pdev->dev, "NVM Read Error\n");
1994 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1995 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1997 if (!is_valid_ether_addr(netdev->perm_addr)) {
1998 dev_err(&pdev->dev, "Invalid MAC Address\n");
1999 err = -EIO;
2000 goto err_eeprom;
2003 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2004 (unsigned long) adapter);
2005 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2006 (unsigned long) adapter);
2008 INIT_WORK(&adapter->reset_task, igb_reset_task);
2009 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2011 /* Initialize link properties that are user-changeable */
2012 adapter->fc_autoneg = true;
2013 hw->mac.autoneg = true;
2014 hw->phy.autoneg_advertised = 0x2f;
2016 hw->fc.requested_mode = e1000_fc_default;
2017 hw->fc.current_mode = e1000_fc_default;
2019 igb_validate_mdi_setting(hw);
2021 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2022 * enable the ACPI Magic Packet filter
2025 if (hw->bus.func == 0)
2026 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2027 else if (hw->mac.type >= e1000_82580)
2028 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2029 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2030 &eeprom_data);
2031 else if (hw->bus.func == 1)
2032 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2034 if (eeprom_data & eeprom_apme_mask)
2035 adapter->eeprom_wol |= E1000_WUFC_MAG;
2037 /* now that we have the eeprom settings, apply the special cases where
2038 * the eeprom may be wrong or the board simply won't support wake on
2039 * lan on a particular port */
2040 switch (pdev->device) {
2041 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2042 adapter->eeprom_wol = 0;
2043 break;
2044 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2045 case E1000_DEV_ID_82576_FIBER:
2046 case E1000_DEV_ID_82576_SERDES:
2047 /* Wake events only supported on port A for dual fiber
2048 * regardless of eeprom setting */
2049 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2050 adapter->eeprom_wol = 0;
2051 break;
2052 case E1000_DEV_ID_82576_QUAD_COPPER:
2053 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2054 /* if quad port adapter, disable WoL on all but port A */
2055 if (global_quad_port_a != 0)
2056 adapter->eeprom_wol = 0;
2057 else
2058 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2059 /* Reset for multiple quad port adapters */
2060 if (++global_quad_port_a == 4)
2061 global_quad_port_a = 0;
2062 break;
2065 /* initialize the wol settings based on the eeprom settings */
2066 adapter->wol = adapter->eeprom_wol;
2067 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2069 /* reset the hardware with the new settings */
2070 igb_reset(adapter);
2072 /* let the f/w know that the h/w is now under the control of the
2073 * driver. */
2074 igb_get_hw_control(adapter);
2076 strcpy(netdev->name, "eth%d");
2077 err = register_netdev(netdev);
2078 if (err)
2079 goto err_register;
2081 /* carrier off reporting is important to ethtool even BEFORE open */
2082 netif_carrier_off(netdev);
2084 #ifdef CONFIG_IGB_DCA
2085 if (dca_add_requester(&pdev->dev) == 0) {
2086 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2087 dev_info(&pdev->dev, "DCA enabled\n");
2088 igb_setup_dca(adapter);
2091 #endif
2092 /* do hw tstamp init after resetting */
2093 igb_init_hw_timer(adapter);
2095 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2096 /* print bus type/speed/width info */
2097 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2098 netdev->name,
2099 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2100 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2101 "unknown"),
2102 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2103 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2104 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2105 "unknown"),
2106 netdev->dev_addr);
2108 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2109 if (ret_val)
2110 strcpy(part_str, "Unknown");
2111 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2112 dev_info(&pdev->dev,
2113 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2114 adapter->msix_entries ? "MSI-X" :
2115 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2116 adapter->num_rx_queues, adapter->num_tx_queues);
2117 switch (hw->mac.type) {
2118 case e1000_i350:
2119 igb_set_eee_i350(hw);
2120 break;
2121 default:
2122 break;
2125 pm_runtime_put_noidle(&pdev->dev);
2126 return 0;
2128 err_register:
2129 igb_release_hw_control(adapter);
2130 err_eeprom:
2131 if (!igb_check_reset_block(hw))
2132 igb_reset_phy(hw);
2134 if (hw->flash_address)
2135 iounmap(hw->flash_address);
2136 err_sw_init:
2137 igb_clear_interrupt_scheme(adapter);
2138 iounmap(hw->hw_addr);
2139 err_ioremap:
2140 free_netdev(netdev);
2141 err_alloc_etherdev:
2142 pci_release_selected_regions(pdev,
2143 pci_select_bars(pdev, IORESOURCE_MEM));
2144 err_pci_reg:
2145 err_dma:
2146 pci_disable_device(pdev);
2147 return err;
2151 * igb_remove - Device Removal Routine
2152 * @pdev: PCI device information struct
2154 * igb_remove is called by the PCI subsystem to alert the driver
2155 * that it should release a PCI device. The could be caused by a
2156 * Hot-Plug event, or because the driver is going to be removed from
2157 * memory.
2159 static void __devexit igb_remove(struct pci_dev *pdev)
2161 struct net_device *netdev = pci_get_drvdata(pdev);
2162 struct igb_adapter *adapter = netdev_priv(netdev);
2163 struct e1000_hw *hw = &adapter->hw;
2165 pm_runtime_get_noresume(&pdev->dev);
2168 * The watchdog timer may be rescheduled, so explicitly
2169 * disable watchdog from being rescheduled.
2171 set_bit(__IGB_DOWN, &adapter->state);
2172 del_timer_sync(&adapter->watchdog_timer);
2173 del_timer_sync(&adapter->phy_info_timer);
2175 cancel_work_sync(&adapter->reset_task);
2176 cancel_work_sync(&adapter->watchdog_task);
2178 #ifdef CONFIG_IGB_DCA
2179 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2180 dev_info(&pdev->dev, "DCA disabled\n");
2181 dca_remove_requester(&pdev->dev);
2182 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2183 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2185 #endif
2187 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2188 * would have already happened in close and is redundant. */
2189 igb_release_hw_control(adapter);
2191 unregister_netdev(netdev);
2193 igb_clear_interrupt_scheme(adapter);
2195 #ifdef CONFIG_PCI_IOV
2196 /* reclaim resources allocated to VFs */
2197 if (adapter->vf_data) {
2198 /* disable iov and allow time for transactions to clear */
2199 if (!igb_check_vf_assignment(adapter)) {
2200 pci_disable_sriov(pdev);
2201 msleep(500);
2202 } else {
2203 dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2206 kfree(adapter->vf_data);
2207 adapter->vf_data = NULL;
2208 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2209 wrfl();
2210 msleep(100);
2211 dev_info(&pdev->dev, "IOV Disabled\n");
2213 #endif
2215 iounmap(hw->hw_addr);
2216 if (hw->flash_address)
2217 iounmap(hw->flash_address);
2218 pci_release_selected_regions(pdev,
2219 pci_select_bars(pdev, IORESOURCE_MEM));
2221 kfree(adapter->shadow_vfta);
2222 free_netdev(netdev);
2224 pci_disable_pcie_error_reporting(pdev);
2226 pci_disable_device(pdev);
2230 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2231 * @adapter: board private structure to initialize
2233 * This function initializes the vf specific data storage and then attempts to
2234 * allocate the VFs. The reason for ordering it this way is because it is much
2235 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2236 * the memory for the VFs.
2238 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2240 #ifdef CONFIG_PCI_IOV
2241 struct pci_dev *pdev = adapter->pdev;
2242 int old_vfs = igb_find_enabled_vfs(adapter);
2243 int i;
2245 if (old_vfs) {
2246 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2247 "max_vfs setting of %d\n", old_vfs, max_vfs);
2248 adapter->vfs_allocated_count = old_vfs;
2251 if (!adapter->vfs_allocated_count)
2252 return;
2254 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2255 sizeof(struct vf_data_storage), GFP_KERNEL);
2256 /* if allocation failed then we do not support SR-IOV */
2257 if (!adapter->vf_data) {
2258 adapter->vfs_allocated_count = 0;
2259 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2260 "Data Storage\n");
2261 goto out;
2264 if (!old_vfs) {
2265 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2266 goto err_out;
2268 dev_info(&pdev->dev, "%d VFs allocated\n",
2269 adapter->vfs_allocated_count);
2270 for (i = 0; i < adapter->vfs_allocated_count; i++)
2271 igb_vf_configure(adapter, i);
2273 /* DMA Coalescing is not supported in IOV mode. */
2274 adapter->flags &= ~IGB_FLAG_DMAC;
2275 goto out;
2276 err_out:
2277 kfree(adapter->vf_data);
2278 adapter->vf_data = NULL;
2279 adapter->vfs_allocated_count = 0;
2280 out:
2281 return;
2282 #endif /* CONFIG_PCI_IOV */
2286 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2287 * @adapter: board private structure to initialize
2289 * igb_init_hw_timer initializes the function pointer and values for the hw
2290 * timer found in hardware.
2292 static void igb_init_hw_timer(struct igb_adapter *adapter)
2294 struct e1000_hw *hw = &adapter->hw;
2296 switch (hw->mac.type) {
2297 case e1000_i350:
2298 case e1000_82580:
2299 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2300 adapter->cycles.read = igb_read_clock;
2301 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2302 adapter->cycles.mult = 1;
2304 * The 82580 timesync updates the system timer every 8ns by 8ns
2305 * and the value cannot be shifted. Instead we need to shift
2306 * the registers to generate a 64bit timer value. As a result
2307 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2308 * 24 in order to generate a larger value for synchronization.
2310 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2311 /* disable system timer temporarily by setting bit 31 */
2312 wr32(E1000_TSAUXC, 0x80000000);
2313 wrfl();
2315 /* Set registers so that rollover occurs soon to test this. */
2316 wr32(E1000_SYSTIMR, 0x00000000);
2317 wr32(E1000_SYSTIML, 0x80000000);
2318 wr32(E1000_SYSTIMH, 0x000000FF);
2319 wrfl();
2321 /* enable system timer by clearing bit 31 */
2322 wr32(E1000_TSAUXC, 0x0);
2323 wrfl();
2325 timecounter_init(&adapter->clock,
2326 &adapter->cycles,
2327 ktime_to_ns(ktime_get_real()));
2329 * Synchronize our NIC clock against system wall clock. NIC
2330 * time stamp reading requires ~3us per sample, each sample
2331 * was pretty stable even under load => only require 10
2332 * samples for each offset comparison.
2334 memset(&adapter->compare, 0, sizeof(adapter->compare));
2335 adapter->compare.source = &adapter->clock;
2336 adapter->compare.target = ktime_get_real;
2337 adapter->compare.num_samples = 10;
2338 timecompare_update(&adapter->compare, 0);
2339 break;
2340 case e1000_82576:
2342 * Initialize hardware timer: we keep it running just in case
2343 * that some program needs it later on.
2345 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2346 adapter->cycles.read = igb_read_clock;
2347 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2348 adapter->cycles.mult = 1;
2350 * Scale the NIC clock cycle by a large factor so that
2351 * relatively small clock corrections can be added or
2352 * subtracted at each clock tick. The drawbacks of a large
2353 * factor are a) that the clock register overflows more quickly
2354 * (not such a big deal) and b) that the increment per tick has
2355 * to fit into 24 bits. As a result we need to use a shift of
2356 * 19 so we can fit a value of 16 into the TIMINCA register.
2358 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2359 wr32(E1000_TIMINCA,
2360 (1 << E1000_TIMINCA_16NS_SHIFT) |
2361 (16 << IGB_82576_TSYNC_SHIFT));
2363 /* Set registers so that rollover occurs soon to test this. */
2364 wr32(E1000_SYSTIML, 0x00000000);
2365 wr32(E1000_SYSTIMH, 0xFF800000);
2366 wrfl();
2368 timecounter_init(&adapter->clock,
2369 &adapter->cycles,
2370 ktime_to_ns(ktime_get_real()));
2372 * Synchronize our NIC clock against system wall clock. NIC
2373 * time stamp reading requires ~3us per sample, each sample
2374 * was pretty stable even under load => only require 10
2375 * samples for each offset comparison.
2377 memset(&adapter->compare, 0, sizeof(adapter->compare));
2378 adapter->compare.source = &adapter->clock;
2379 adapter->compare.target = ktime_get_real;
2380 adapter->compare.num_samples = 10;
2381 timecompare_update(&adapter->compare, 0);
2382 break;
2383 case e1000_82575:
2384 /* 82575 does not support timesync */
2385 default:
2386 break;
2392 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2393 * @adapter: board private structure to initialize
2395 * igb_sw_init initializes the Adapter private data structure.
2396 * Fields are initialized based on PCI device information and
2397 * OS network device settings (MTU size).
2399 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2401 struct e1000_hw *hw = &adapter->hw;
2402 struct net_device *netdev = adapter->netdev;
2403 struct pci_dev *pdev = adapter->pdev;
2405 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2407 /* set default ring sizes */
2408 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2409 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2411 /* set default ITR values */
2412 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2413 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2415 /* set default work limits */
2416 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2418 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2419 VLAN_HLEN;
2420 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2422 adapter->node = -1;
2424 spin_lock_init(&adapter->stats64_lock);
2425 #ifdef CONFIG_PCI_IOV
2426 switch (hw->mac.type) {
2427 case e1000_82576:
2428 case e1000_i350:
2429 if (max_vfs > 7) {
2430 dev_warn(&pdev->dev,
2431 "Maximum of 7 VFs per PF, using max\n");
2432 adapter->vfs_allocated_count = 7;
2433 } else
2434 adapter->vfs_allocated_count = max_vfs;
2435 break;
2436 default:
2437 break;
2439 #endif /* CONFIG_PCI_IOV */
2440 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2441 /* i350 cannot do RSS and SR-IOV at the same time */
2442 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2443 adapter->rss_queues = 1;
2446 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2447 * then we should combine the queues into a queue pair in order to
2448 * conserve interrupts due to limited supply
2450 if ((adapter->rss_queues > 4) ||
2451 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2452 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2454 /* Setup and initialize a copy of the hw vlan table array */
2455 adapter->shadow_vfta = kzalloc(sizeof(u32) *
2456 E1000_VLAN_FILTER_TBL_SIZE,
2457 GFP_ATOMIC);
2459 /* This call may decrease the number of queues */
2460 if (igb_init_interrupt_scheme(adapter)) {
2461 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2462 return -ENOMEM;
2465 igb_probe_vfs(adapter);
2467 /* Explicitly disable IRQ since the NIC can be in any state. */
2468 igb_irq_disable(adapter);
2470 if (hw->mac.type == e1000_i350)
2471 adapter->flags &= ~IGB_FLAG_DMAC;
2473 set_bit(__IGB_DOWN, &adapter->state);
2474 return 0;
2478 * igb_open - Called when a network interface is made active
2479 * @netdev: network interface device structure
2481 * Returns 0 on success, negative value on failure
2483 * The open entry point is called when a network interface is made
2484 * active by the system (IFF_UP). At this point all resources needed
2485 * for transmit and receive operations are allocated, the interrupt
2486 * handler is registered with the OS, the watchdog timer is started,
2487 * and the stack is notified that the interface is ready.
2489 static int __igb_open(struct net_device *netdev, bool resuming)
2491 struct igb_adapter *adapter = netdev_priv(netdev);
2492 struct e1000_hw *hw = &adapter->hw;
2493 struct pci_dev *pdev = adapter->pdev;
2494 int err;
2495 int i;
2497 /* disallow open during test */
2498 if (test_bit(__IGB_TESTING, &adapter->state)) {
2499 WARN_ON(resuming);
2500 return -EBUSY;
2503 if (!resuming)
2504 pm_runtime_get_sync(&pdev->dev);
2506 netif_carrier_off(netdev);
2508 /* allocate transmit descriptors */
2509 err = igb_setup_all_tx_resources(adapter);
2510 if (err)
2511 goto err_setup_tx;
2513 /* allocate receive descriptors */
2514 err = igb_setup_all_rx_resources(adapter);
2515 if (err)
2516 goto err_setup_rx;
2518 igb_power_up_link(adapter);
2520 /* before we allocate an interrupt, we must be ready to handle it.
2521 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2522 * as soon as we call pci_request_irq, so we have to setup our
2523 * clean_rx handler before we do so. */
2524 igb_configure(adapter);
2526 err = igb_request_irq(adapter);
2527 if (err)
2528 goto err_req_irq;
2530 /* From here on the code is the same as igb_up() */
2531 clear_bit(__IGB_DOWN, &adapter->state);
2533 for (i = 0; i < adapter->num_q_vectors; i++)
2534 napi_enable(&(adapter->q_vector[i]->napi));
2536 /* Clear any pending interrupts. */
2537 rd32(E1000_ICR);
2539 igb_irq_enable(adapter);
2541 /* notify VFs that reset has been completed */
2542 if (adapter->vfs_allocated_count) {
2543 u32 reg_data = rd32(E1000_CTRL_EXT);
2544 reg_data |= E1000_CTRL_EXT_PFRSTD;
2545 wr32(E1000_CTRL_EXT, reg_data);
2548 netif_tx_start_all_queues(netdev);
2550 if (!resuming)
2551 pm_runtime_put(&pdev->dev);
2553 /* start the watchdog. */
2554 hw->mac.get_link_status = 1;
2555 schedule_work(&adapter->watchdog_task);
2557 return 0;
2559 err_req_irq:
2560 igb_release_hw_control(adapter);
2561 igb_power_down_link(adapter);
2562 igb_free_all_rx_resources(adapter);
2563 err_setup_rx:
2564 igb_free_all_tx_resources(adapter);
2565 err_setup_tx:
2566 igb_reset(adapter);
2567 if (!resuming)
2568 pm_runtime_put(&pdev->dev);
2570 return err;
2573 static int igb_open(struct net_device *netdev)
2575 return __igb_open(netdev, false);
2579 * igb_close - Disables a network interface
2580 * @netdev: network interface device structure
2582 * Returns 0, this is not allowed to fail
2584 * The close entry point is called when an interface is de-activated
2585 * by the OS. The hardware is still under the driver's control, but
2586 * needs to be disabled. A global MAC reset is issued to stop the
2587 * hardware, and all transmit and receive resources are freed.
2589 static int __igb_close(struct net_device *netdev, bool suspending)
2591 struct igb_adapter *adapter = netdev_priv(netdev);
2592 struct pci_dev *pdev = adapter->pdev;
2594 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2596 if (!suspending)
2597 pm_runtime_get_sync(&pdev->dev);
2599 igb_down(adapter);
2600 igb_free_irq(adapter);
2602 igb_free_all_tx_resources(adapter);
2603 igb_free_all_rx_resources(adapter);
2605 if (!suspending)
2606 pm_runtime_put_sync(&pdev->dev);
2607 return 0;
2610 static int igb_close(struct net_device *netdev)
2612 return __igb_close(netdev, false);
2616 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2617 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2619 * Return 0 on success, negative on failure
2621 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2623 struct device *dev = tx_ring->dev;
2624 int orig_node = dev_to_node(dev);
2625 int size;
2627 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2628 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2629 if (!tx_ring->tx_buffer_info)
2630 tx_ring->tx_buffer_info = vzalloc(size);
2631 if (!tx_ring->tx_buffer_info)
2632 goto err;
2634 /* round up to nearest 4K */
2635 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2636 tx_ring->size = ALIGN(tx_ring->size, 4096);
2638 set_dev_node(dev, tx_ring->numa_node);
2639 tx_ring->desc = dma_alloc_coherent(dev,
2640 tx_ring->size,
2641 &tx_ring->dma,
2642 GFP_KERNEL);
2643 set_dev_node(dev, orig_node);
2644 if (!tx_ring->desc)
2645 tx_ring->desc = dma_alloc_coherent(dev,
2646 tx_ring->size,
2647 &tx_ring->dma,
2648 GFP_KERNEL);
2650 if (!tx_ring->desc)
2651 goto err;
2653 tx_ring->next_to_use = 0;
2654 tx_ring->next_to_clean = 0;
2656 return 0;
2658 err:
2659 vfree(tx_ring->tx_buffer_info);
2660 dev_err(dev,
2661 "Unable to allocate memory for the transmit descriptor ring\n");
2662 return -ENOMEM;
2666 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2667 * (Descriptors) for all queues
2668 * @adapter: board private structure
2670 * Return 0 on success, negative on failure
2672 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2674 struct pci_dev *pdev = adapter->pdev;
2675 int i, err = 0;
2677 for (i = 0; i < adapter->num_tx_queues; i++) {
2678 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2679 if (err) {
2680 dev_err(&pdev->dev,
2681 "Allocation for Tx Queue %u failed\n", i);
2682 for (i--; i >= 0; i--)
2683 igb_free_tx_resources(adapter->tx_ring[i]);
2684 break;
2688 return err;
2692 * igb_setup_tctl - configure the transmit control registers
2693 * @adapter: Board private structure
2695 void igb_setup_tctl(struct igb_adapter *adapter)
2697 struct e1000_hw *hw = &adapter->hw;
2698 u32 tctl;
2700 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2701 wr32(E1000_TXDCTL(0), 0);
2703 /* Program the Transmit Control Register */
2704 tctl = rd32(E1000_TCTL);
2705 tctl &= ~E1000_TCTL_CT;
2706 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2707 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2709 igb_config_collision_dist(hw);
2711 /* Enable transmits */
2712 tctl |= E1000_TCTL_EN;
2714 wr32(E1000_TCTL, tctl);
2718 * igb_configure_tx_ring - Configure transmit ring after Reset
2719 * @adapter: board private structure
2720 * @ring: tx ring to configure
2722 * Configure a transmit ring after a reset.
2724 void igb_configure_tx_ring(struct igb_adapter *adapter,
2725 struct igb_ring *ring)
2727 struct e1000_hw *hw = &adapter->hw;
2728 u32 txdctl = 0;
2729 u64 tdba = ring->dma;
2730 int reg_idx = ring->reg_idx;
2732 /* disable the queue */
2733 wr32(E1000_TXDCTL(reg_idx), 0);
2734 wrfl();
2735 mdelay(10);
2737 wr32(E1000_TDLEN(reg_idx),
2738 ring->count * sizeof(union e1000_adv_tx_desc));
2739 wr32(E1000_TDBAL(reg_idx),
2740 tdba & 0x00000000ffffffffULL);
2741 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2743 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2744 wr32(E1000_TDH(reg_idx), 0);
2745 writel(0, ring->tail);
2747 txdctl |= IGB_TX_PTHRESH;
2748 txdctl |= IGB_TX_HTHRESH << 8;
2749 txdctl |= IGB_TX_WTHRESH << 16;
2751 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2752 wr32(E1000_TXDCTL(reg_idx), txdctl);
2756 * igb_configure_tx - Configure transmit Unit after Reset
2757 * @adapter: board private structure
2759 * Configure the Tx unit of the MAC after a reset.
2761 static void igb_configure_tx(struct igb_adapter *adapter)
2763 int i;
2765 for (i = 0; i < adapter->num_tx_queues; i++)
2766 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2770 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2771 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2773 * Returns 0 on success, negative on failure
2775 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2777 struct device *dev = rx_ring->dev;
2778 int orig_node = dev_to_node(dev);
2779 int size, desc_len;
2781 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2782 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2783 if (!rx_ring->rx_buffer_info)
2784 rx_ring->rx_buffer_info = vzalloc(size);
2785 if (!rx_ring->rx_buffer_info)
2786 goto err;
2788 desc_len = sizeof(union e1000_adv_rx_desc);
2790 /* Round up to nearest 4K */
2791 rx_ring->size = rx_ring->count * desc_len;
2792 rx_ring->size = ALIGN(rx_ring->size, 4096);
2794 set_dev_node(dev, rx_ring->numa_node);
2795 rx_ring->desc = dma_alloc_coherent(dev,
2796 rx_ring->size,
2797 &rx_ring->dma,
2798 GFP_KERNEL);
2799 set_dev_node(dev, orig_node);
2800 if (!rx_ring->desc)
2801 rx_ring->desc = dma_alloc_coherent(dev,
2802 rx_ring->size,
2803 &rx_ring->dma,
2804 GFP_KERNEL);
2806 if (!rx_ring->desc)
2807 goto err;
2809 rx_ring->next_to_clean = 0;
2810 rx_ring->next_to_use = 0;
2812 return 0;
2814 err:
2815 vfree(rx_ring->rx_buffer_info);
2816 rx_ring->rx_buffer_info = NULL;
2817 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2818 " ring\n");
2819 return -ENOMEM;
2823 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2824 * (Descriptors) for all queues
2825 * @adapter: board private structure
2827 * Return 0 on success, negative on failure
2829 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2831 struct pci_dev *pdev = adapter->pdev;
2832 int i, err = 0;
2834 for (i = 0; i < adapter->num_rx_queues; i++) {
2835 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2836 if (err) {
2837 dev_err(&pdev->dev,
2838 "Allocation for Rx Queue %u failed\n", i);
2839 for (i--; i >= 0; i--)
2840 igb_free_rx_resources(adapter->rx_ring[i]);
2841 break;
2845 return err;
2849 * igb_setup_mrqc - configure the multiple receive queue control registers
2850 * @adapter: Board private structure
2852 static void igb_setup_mrqc(struct igb_adapter *adapter)
2854 struct e1000_hw *hw = &adapter->hw;
2855 u32 mrqc, rxcsum;
2856 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2857 union e1000_reta {
2858 u32 dword;
2859 u8 bytes[4];
2860 } reta;
2861 static const u8 rsshash[40] = {
2862 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2863 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2864 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2865 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2867 /* Fill out hash function seeds */
2868 for (j = 0; j < 10; j++) {
2869 u32 rsskey = rsshash[(j * 4)];
2870 rsskey |= rsshash[(j * 4) + 1] << 8;
2871 rsskey |= rsshash[(j * 4) + 2] << 16;
2872 rsskey |= rsshash[(j * 4) + 3] << 24;
2873 array_wr32(E1000_RSSRK(0), j, rsskey);
2876 num_rx_queues = adapter->rss_queues;
2878 if (adapter->vfs_allocated_count) {
2879 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2880 switch (hw->mac.type) {
2881 case e1000_i350:
2882 case e1000_82580:
2883 num_rx_queues = 1;
2884 shift = 0;
2885 break;
2886 case e1000_82576:
2887 shift = 3;
2888 num_rx_queues = 2;
2889 break;
2890 case e1000_82575:
2891 shift = 2;
2892 shift2 = 6;
2893 default:
2894 break;
2896 } else {
2897 if (hw->mac.type == e1000_82575)
2898 shift = 6;
2901 for (j = 0; j < (32 * 4); j++) {
2902 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2903 if (shift2)
2904 reta.bytes[j & 3] |= num_rx_queues << shift2;
2905 if ((j & 3) == 3)
2906 wr32(E1000_RETA(j >> 2), reta.dword);
2910 * Disable raw packet checksumming so that RSS hash is placed in
2911 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2912 * offloads as they are enabled by default
2914 rxcsum = rd32(E1000_RXCSUM);
2915 rxcsum |= E1000_RXCSUM_PCSD;
2917 if (adapter->hw.mac.type >= e1000_82576)
2918 /* Enable Receive Checksum Offload for SCTP */
2919 rxcsum |= E1000_RXCSUM_CRCOFL;
2921 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2922 wr32(E1000_RXCSUM, rxcsum);
2924 /* If VMDq is enabled then we set the appropriate mode for that, else
2925 * we default to RSS so that an RSS hash is calculated per packet even
2926 * if we are only using one queue */
2927 if (adapter->vfs_allocated_count) {
2928 if (hw->mac.type > e1000_82575) {
2929 /* Set the default pool for the PF's first queue */
2930 u32 vtctl = rd32(E1000_VT_CTL);
2931 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2932 E1000_VT_CTL_DISABLE_DEF_POOL);
2933 vtctl |= adapter->vfs_allocated_count <<
2934 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2935 wr32(E1000_VT_CTL, vtctl);
2937 if (adapter->rss_queues > 1)
2938 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2939 else
2940 mrqc = E1000_MRQC_ENABLE_VMDQ;
2941 } else {
2942 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2944 igb_vmm_control(adapter);
2947 * Generate RSS hash based on TCP port numbers and/or
2948 * IPv4/v6 src and dst addresses since UDP cannot be
2949 * hashed reliably due to IP fragmentation
2951 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2952 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2953 E1000_MRQC_RSS_FIELD_IPV6 |
2954 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2955 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2957 wr32(E1000_MRQC, mrqc);
2961 * igb_setup_rctl - configure the receive control registers
2962 * @adapter: Board private structure
2964 void igb_setup_rctl(struct igb_adapter *adapter)
2966 struct e1000_hw *hw = &adapter->hw;
2967 u32 rctl;
2969 rctl = rd32(E1000_RCTL);
2971 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2972 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2974 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2975 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2978 * enable stripping of CRC. It's unlikely this will break BMC
2979 * redirection as it did with e1000. Newer features require
2980 * that the HW strips the CRC.
2982 rctl |= E1000_RCTL_SECRC;
2984 /* disable store bad packets and clear size bits. */
2985 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2987 /* enable LPE to prevent packets larger than max_frame_size */
2988 rctl |= E1000_RCTL_LPE;
2990 /* disable queue 0 to prevent tail write w/o re-config */
2991 wr32(E1000_RXDCTL(0), 0);
2993 /* Attention!!! For SR-IOV PF driver operations you must enable
2994 * queue drop for all VF and PF queues to prevent head of line blocking
2995 * if an un-trusted VF does not provide descriptors to hardware.
2997 if (adapter->vfs_allocated_count) {
2998 /* set all queue drop enable bits */
2999 wr32(E1000_QDE, ALL_QUEUES);
3002 wr32(E1000_RCTL, rctl);
3005 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3006 int vfn)
3008 struct e1000_hw *hw = &adapter->hw;
3009 u32 vmolr;
3011 /* if it isn't the PF check to see if VFs are enabled and
3012 * increase the size to support vlan tags */
3013 if (vfn < adapter->vfs_allocated_count &&
3014 adapter->vf_data[vfn].vlans_enabled)
3015 size += VLAN_TAG_SIZE;
3017 vmolr = rd32(E1000_VMOLR(vfn));
3018 vmolr &= ~E1000_VMOLR_RLPML_MASK;
3019 vmolr |= size | E1000_VMOLR_LPE;
3020 wr32(E1000_VMOLR(vfn), vmolr);
3022 return 0;
3026 * igb_rlpml_set - set maximum receive packet size
3027 * @adapter: board private structure
3029 * Configure maximum receivable packet size.
3031 static void igb_rlpml_set(struct igb_adapter *adapter)
3033 u32 max_frame_size = adapter->max_frame_size;
3034 struct e1000_hw *hw = &adapter->hw;
3035 u16 pf_id = adapter->vfs_allocated_count;
3037 if (pf_id) {
3038 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3040 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3041 * to our max jumbo frame size, in case we need to enable
3042 * jumbo frames on one of the rings later.
3043 * This will not pass over-length frames into the default
3044 * queue because it's gated by the VMOLR.RLPML.
3046 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3049 wr32(E1000_RLPML, max_frame_size);
3052 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3053 int vfn, bool aupe)
3055 struct e1000_hw *hw = &adapter->hw;
3056 u32 vmolr;
3059 * This register exists only on 82576 and newer so if we are older then
3060 * we should exit and do nothing
3062 if (hw->mac.type < e1000_82576)
3063 return;
3065 vmolr = rd32(E1000_VMOLR(vfn));
3066 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3067 if (aupe)
3068 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3069 else
3070 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3072 /* clear all bits that might not be set */
3073 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3075 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3076 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3078 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3079 * multicast packets
3081 if (vfn <= adapter->vfs_allocated_count)
3082 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3084 wr32(E1000_VMOLR(vfn), vmolr);
3088 * igb_configure_rx_ring - Configure a receive ring after Reset
3089 * @adapter: board private structure
3090 * @ring: receive ring to be configured
3092 * Configure the Rx unit of the MAC after a reset.
3094 void igb_configure_rx_ring(struct igb_adapter *adapter,
3095 struct igb_ring *ring)
3097 struct e1000_hw *hw = &adapter->hw;
3098 u64 rdba = ring->dma;
3099 int reg_idx = ring->reg_idx;
3100 u32 srrctl = 0, rxdctl = 0;
3102 /* disable the queue */
3103 wr32(E1000_RXDCTL(reg_idx), 0);
3105 /* Set DMA base address registers */
3106 wr32(E1000_RDBAL(reg_idx),
3107 rdba & 0x00000000ffffffffULL);
3108 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3109 wr32(E1000_RDLEN(reg_idx),
3110 ring->count * sizeof(union e1000_adv_rx_desc));
3112 /* initialize head and tail */
3113 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3114 wr32(E1000_RDH(reg_idx), 0);
3115 writel(0, ring->tail);
3117 /* set descriptor configuration */
3118 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3119 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3120 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3121 #else
3122 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3123 #endif
3124 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3125 if (hw->mac.type >= e1000_82580)
3126 srrctl |= E1000_SRRCTL_TIMESTAMP;
3127 /* Only set Drop Enable if we are supporting multiple queues */
3128 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3129 srrctl |= E1000_SRRCTL_DROP_EN;
3131 wr32(E1000_SRRCTL(reg_idx), srrctl);
3133 /* set filtering for VMDQ pools */
3134 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3136 rxdctl |= IGB_RX_PTHRESH;
3137 rxdctl |= IGB_RX_HTHRESH << 8;
3138 rxdctl |= IGB_RX_WTHRESH << 16;
3140 /* enable receive descriptor fetching */
3141 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3142 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3146 * igb_configure_rx - Configure receive Unit after Reset
3147 * @adapter: board private structure
3149 * Configure the Rx unit of the MAC after a reset.
3151 static void igb_configure_rx(struct igb_adapter *adapter)
3153 int i;
3155 /* set UTA to appropriate mode */
3156 igb_set_uta(adapter);
3158 /* set the correct pool for the PF default MAC address in entry 0 */
3159 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3160 adapter->vfs_allocated_count);
3162 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3163 * the Base and Length of the Rx Descriptor Ring */
3164 for (i = 0; i < adapter->num_rx_queues; i++)
3165 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3169 * igb_free_tx_resources - Free Tx Resources per Queue
3170 * @tx_ring: Tx descriptor ring for a specific queue
3172 * Free all transmit software resources
3174 void igb_free_tx_resources(struct igb_ring *tx_ring)
3176 igb_clean_tx_ring(tx_ring);
3178 vfree(tx_ring->tx_buffer_info);
3179 tx_ring->tx_buffer_info = NULL;
3181 /* if not set, then don't free */
3182 if (!tx_ring->desc)
3183 return;
3185 dma_free_coherent(tx_ring->dev, tx_ring->size,
3186 tx_ring->desc, tx_ring->dma);
3188 tx_ring->desc = NULL;
3192 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3193 * @adapter: board private structure
3195 * Free all transmit software resources
3197 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3199 int i;
3201 for (i = 0; i < adapter->num_tx_queues; i++)
3202 igb_free_tx_resources(adapter->tx_ring[i]);
3205 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3206 struct igb_tx_buffer *tx_buffer)
3208 if (tx_buffer->skb) {
3209 dev_kfree_skb_any(tx_buffer->skb);
3210 if (tx_buffer->dma)
3211 dma_unmap_single(ring->dev,
3212 tx_buffer->dma,
3213 tx_buffer->length,
3214 DMA_TO_DEVICE);
3215 } else if (tx_buffer->dma) {
3216 dma_unmap_page(ring->dev,
3217 tx_buffer->dma,
3218 tx_buffer->length,
3219 DMA_TO_DEVICE);
3221 tx_buffer->next_to_watch = NULL;
3222 tx_buffer->skb = NULL;
3223 tx_buffer->dma = 0;
3224 /* buffer_info must be completely set up in the transmit path */
3228 * igb_clean_tx_ring - Free Tx Buffers
3229 * @tx_ring: ring to be cleaned
3231 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3233 struct igb_tx_buffer *buffer_info;
3234 unsigned long size;
3235 u16 i;
3237 if (!tx_ring->tx_buffer_info)
3238 return;
3239 /* Free all the Tx ring sk_buffs */
3241 for (i = 0; i < tx_ring->count; i++) {
3242 buffer_info = &tx_ring->tx_buffer_info[i];
3243 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3245 netdev_tx_reset_queue(txring_txq(tx_ring));
3247 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3248 memset(tx_ring->tx_buffer_info, 0, size);
3250 /* Zero out the descriptor ring */
3251 memset(tx_ring->desc, 0, tx_ring->size);
3253 tx_ring->next_to_use = 0;
3254 tx_ring->next_to_clean = 0;
3258 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3259 * @adapter: board private structure
3261 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3263 int i;
3265 for (i = 0; i < adapter->num_tx_queues; i++)
3266 igb_clean_tx_ring(adapter->tx_ring[i]);
3270 * igb_free_rx_resources - Free Rx Resources
3271 * @rx_ring: ring to clean the resources from
3273 * Free all receive software resources
3275 void igb_free_rx_resources(struct igb_ring *rx_ring)
3277 igb_clean_rx_ring(rx_ring);
3279 vfree(rx_ring->rx_buffer_info);
3280 rx_ring->rx_buffer_info = NULL;
3282 /* if not set, then don't free */
3283 if (!rx_ring->desc)
3284 return;
3286 dma_free_coherent(rx_ring->dev, rx_ring->size,
3287 rx_ring->desc, rx_ring->dma);
3289 rx_ring->desc = NULL;
3293 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3294 * @adapter: board private structure
3296 * Free all receive software resources
3298 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3300 int i;
3302 for (i = 0; i < adapter->num_rx_queues; i++)
3303 igb_free_rx_resources(adapter->rx_ring[i]);
3307 * igb_clean_rx_ring - Free Rx Buffers per Queue
3308 * @rx_ring: ring to free buffers from
3310 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3312 unsigned long size;
3313 u16 i;
3315 if (!rx_ring->rx_buffer_info)
3316 return;
3318 /* Free all the Rx ring sk_buffs */
3319 for (i = 0; i < rx_ring->count; i++) {
3320 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3321 if (buffer_info->dma) {
3322 dma_unmap_single(rx_ring->dev,
3323 buffer_info->dma,
3324 IGB_RX_HDR_LEN,
3325 DMA_FROM_DEVICE);
3326 buffer_info->dma = 0;
3329 if (buffer_info->skb) {
3330 dev_kfree_skb(buffer_info->skb);
3331 buffer_info->skb = NULL;
3333 if (buffer_info->page_dma) {
3334 dma_unmap_page(rx_ring->dev,
3335 buffer_info->page_dma,
3336 PAGE_SIZE / 2,
3337 DMA_FROM_DEVICE);
3338 buffer_info->page_dma = 0;
3340 if (buffer_info->page) {
3341 put_page(buffer_info->page);
3342 buffer_info->page = NULL;
3343 buffer_info->page_offset = 0;
3347 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3348 memset(rx_ring->rx_buffer_info, 0, size);
3350 /* Zero out the descriptor ring */
3351 memset(rx_ring->desc, 0, rx_ring->size);
3353 rx_ring->next_to_clean = 0;
3354 rx_ring->next_to_use = 0;
3358 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3359 * @adapter: board private structure
3361 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3363 int i;
3365 for (i = 0; i < adapter->num_rx_queues; i++)
3366 igb_clean_rx_ring(adapter->rx_ring[i]);
3370 * igb_set_mac - Change the Ethernet Address of the NIC
3371 * @netdev: network interface device structure
3372 * @p: pointer to an address structure
3374 * Returns 0 on success, negative on failure
3376 static int igb_set_mac(struct net_device *netdev, void *p)
3378 struct igb_adapter *adapter = netdev_priv(netdev);
3379 struct e1000_hw *hw = &adapter->hw;
3380 struct sockaddr *addr = p;
3382 if (!is_valid_ether_addr(addr->sa_data))
3383 return -EADDRNOTAVAIL;
3385 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3386 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3388 /* set the correct pool for the new PF MAC address in entry 0 */
3389 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3390 adapter->vfs_allocated_count);
3392 return 0;
3396 * igb_write_mc_addr_list - write multicast addresses to MTA
3397 * @netdev: network interface device structure
3399 * Writes multicast address list to the MTA hash table.
3400 * Returns: -ENOMEM on failure
3401 * 0 on no addresses written
3402 * X on writing X addresses to MTA
3404 static int igb_write_mc_addr_list(struct net_device *netdev)
3406 struct igb_adapter *adapter = netdev_priv(netdev);
3407 struct e1000_hw *hw = &adapter->hw;
3408 struct netdev_hw_addr *ha;
3409 u8 *mta_list;
3410 int i;
3412 if (netdev_mc_empty(netdev)) {
3413 /* nothing to program, so clear mc list */
3414 igb_update_mc_addr_list(hw, NULL, 0);
3415 igb_restore_vf_multicasts(adapter);
3416 return 0;
3419 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3420 if (!mta_list)
3421 return -ENOMEM;
3423 /* The shared function expects a packed array of only addresses. */
3424 i = 0;
3425 netdev_for_each_mc_addr(ha, netdev)
3426 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3428 igb_update_mc_addr_list(hw, mta_list, i);
3429 kfree(mta_list);
3431 return netdev_mc_count(netdev);
3435 * igb_write_uc_addr_list - write unicast addresses to RAR table
3436 * @netdev: network interface device structure
3438 * Writes unicast address list to the RAR table.
3439 * Returns: -ENOMEM on failure/insufficient address space
3440 * 0 on no addresses written
3441 * X on writing X addresses to the RAR table
3443 static int igb_write_uc_addr_list(struct net_device *netdev)
3445 struct igb_adapter *adapter = netdev_priv(netdev);
3446 struct e1000_hw *hw = &adapter->hw;
3447 unsigned int vfn = adapter->vfs_allocated_count;
3448 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3449 int count = 0;
3451 /* return ENOMEM indicating insufficient memory for addresses */
3452 if (netdev_uc_count(netdev) > rar_entries)
3453 return -ENOMEM;
3455 if (!netdev_uc_empty(netdev) && rar_entries) {
3456 struct netdev_hw_addr *ha;
3458 netdev_for_each_uc_addr(ha, netdev) {
3459 if (!rar_entries)
3460 break;
3461 igb_rar_set_qsel(adapter, ha->addr,
3462 rar_entries--,
3463 vfn);
3464 count++;
3467 /* write the addresses in reverse order to avoid write combining */
3468 for (; rar_entries > 0 ; rar_entries--) {
3469 wr32(E1000_RAH(rar_entries), 0);
3470 wr32(E1000_RAL(rar_entries), 0);
3472 wrfl();
3474 return count;
3478 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3479 * @netdev: network interface device structure
3481 * The set_rx_mode entry point is called whenever the unicast or multicast
3482 * address lists or the network interface flags are updated. This routine is
3483 * responsible for configuring the hardware for proper unicast, multicast,
3484 * promiscuous mode, and all-multi behavior.
3486 static void igb_set_rx_mode(struct net_device *netdev)
3488 struct igb_adapter *adapter = netdev_priv(netdev);
3489 struct e1000_hw *hw = &adapter->hw;
3490 unsigned int vfn = adapter->vfs_allocated_count;
3491 u32 rctl, vmolr = 0;
3492 int count;
3494 /* Check for Promiscuous and All Multicast modes */
3495 rctl = rd32(E1000_RCTL);
3497 /* clear the effected bits */
3498 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3500 if (netdev->flags & IFF_PROMISC) {
3501 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3502 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3503 } else {
3504 if (netdev->flags & IFF_ALLMULTI) {
3505 rctl |= E1000_RCTL_MPE;
3506 vmolr |= E1000_VMOLR_MPME;
3507 } else {
3509 * Write addresses to the MTA, if the attempt fails
3510 * then we should just turn on promiscuous mode so
3511 * that we can at least receive multicast traffic
3513 count = igb_write_mc_addr_list(netdev);
3514 if (count < 0) {
3515 rctl |= E1000_RCTL_MPE;
3516 vmolr |= E1000_VMOLR_MPME;
3517 } else if (count) {
3518 vmolr |= E1000_VMOLR_ROMPE;
3522 * Write addresses to available RAR registers, if there is not
3523 * sufficient space to store all the addresses then enable
3524 * unicast promiscuous mode
3526 count = igb_write_uc_addr_list(netdev);
3527 if (count < 0) {
3528 rctl |= E1000_RCTL_UPE;
3529 vmolr |= E1000_VMOLR_ROPE;
3531 rctl |= E1000_RCTL_VFE;
3533 wr32(E1000_RCTL, rctl);
3536 * In order to support SR-IOV and eventually VMDq it is necessary to set
3537 * the VMOLR to enable the appropriate modes. Without this workaround
3538 * we will have issues with VLAN tag stripping not being done for frames
3539 * that are only arriving because we are the default pool
3541 if (hw->mac.type < e1000_82576)
3542 return;
3544 vmolr |= rd32(E1000_VMOLR(vfn)) &
3545 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3546 wr32(E1000_VMOLR(vfn), vmolr);
3547 igb_restore_vf_multicasts(adapter);
3550 static void igb_check_wvbr(struct igb_adapter *adapter)
3552 struct e1000_hw *hw = &adapter->hw;
3553 u32 wvbr = 0;
3555 switch (hw->mac.type) {
3556 case e1000_82576:
3557 case e1000_i350:
3558 if (!(wvbr = rd32(E1000_WVBR)))
3559 return;
3560 break;
3561 default:
3562 break;
3565 adapter->wvbr |= wvbr;
3568 #define IGB_STAGGERED_QUEUE_OFFSET 8
3570 static void igb_spoof_check(struct igb_adapter *adapter)
3572 int j;
3574 if (!adapter->wvbr)
3575 return;
3577 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3578 if (adapter->wvbr & (1 << j) ||
3579 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3580 dev_warn(&adapter->pdev->dev,
3581 "Spoof event(s) detected on VF %d\n", j);
3582 adapter->wvbr &=
3583 ~((1 << j) |
3584 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3589 /* Need to wait a few seconds after link up to get diagnostic information from
3590 * the phy */
3591 static void igb_update_phy_info(unsigned long data)
3593 struct igb_adapter *adapter = (struct igb_adapter *) data;
3594 igb_get_phy_info(&adapter->hw);
3598 * igb_has_link - check shared code for link and determine up/down
3599 * @adapter: pointer to driver private info
3601 bool igb_has_link(struct igb_adapter *adapter)
3603 struct e1000_hw *hw = &adapter->hw;
3604 bool link_active = false;
3605 s32 ret_val = 0;
3607 /* get_link_status is set on LSC (link status) interrupt or
3608 * rx sequence error interrupt. get_link_status will stay
3609 * false until the e1000_check_for_link establishes link
3610 * for copper adapters ONLY
3612 switch (hw->phy.media_type) {
3613 case e1000_media_type_copper:
3614 if (hw->mac.get_link_status) {
3615 ret_val = hw->mac.ops.check_for_link(hw);
3616 link_active = !hw->mac.get_link_status;
3617 } else {
3618 link_active = true;
3620 break;
3621 case e1000_media_type_internal_serdes:
3622 ret_val = hw->mac.ops.check_for_link(hw);
3623 link_active = hw->mac.serdes_has_link;
3624 break;
3625 default:
3626 case e1000_media_type_unknown:
3627 break;
3630 return link_active;
3633 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3635 bool ret = false;
3636 u32 ctrl_ext, thstat;
3638 /* check for thermal sensor event on i350, copper only */
3639 if (hw->mac.type == e1000_i350) {
3640 thstat = rd32(E1000_THSTAT);
3641 ctrl_ext = rd32(E1000_CTRL_EXT);
3643 if ((hw->phy.media_type == e1000_media_type_copper) &&
3644 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3645 ret = !!(thstat & event);
3649 return ret;
3653 * igb_watchdog - Timer Call-back
3654 * @data: pointer to adapter cast into an unsigned long
3656 static void igb_watchdog(unsigned long data)
3658 struct igb_adapter *adapter = (struct igb_adapter *)data;
3659 /* Do the rest outside of interrupt context */
3660 schedule_work(&adapter->watchdog_task);
3663 static void igb_watchdog_task(struct work_struct *work)
3665 struct igb_adapter *adapter = container_of(work,
3666 struct igb_adapter,
3667 watchdog_task);
3668 struct e1000_hw *hw = &adapter->hw;
3669 struct net_device *netdev = adapter->netdev;
3670 u32 link;
3671 int i;
3673 link = igb_has_link(adapter);
3674 if (link) {
3675 /* Cancel scheduled suspend requests. */
3676 pm_runtime_resume(netdev->dev.parent);
3678 if (!netif_carrier_ok(netdev)) {
3679 u32 ctrl;
3680 hw->mac.ops.get_speed_and_duplex(hw,
3681 &adapter->link_speed,
3682 &adapter->link_duplex);
3684 ctrl = rd32(E1000_CTRL);
3685 /* Links status message must follow this format */
3686 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3687 "Duplex, Flow Control: %s\n",
3688 netdev->name,
3689 adapter->link_speed,
3690 adapter->link_duplex == FULL_DUPLEX ?
3691 "Full" : "Half",
3692 (ctrl & E1000_CTRL_TFCE) &&
3693 (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3694 (ctrl & E1000_CTRL_RFCE) ? "RX" :
3695 (ctrl & E1000_CTRL_TFCE) ? "TX" : "None");
3697 /* check for thermal sensor event */
3698 if (igb_thermal_sensor_event(hw,
3699 E1000_THSTAT_LINK_THROTTLE)) {
3700 netdev_info(netdev, "The network adapter link "
3701 "speed was downshifted because it "
3702 "overheated\n");
3705 /* adjust timeout factor according to speed/duplex */
3706 adapter->tx_timeout_factor = 1;
3707 switch (adapter->link_speed) {
3708 case SPEED_10:
3709 adapter->tx_timeout_factor = 14;
3710 break;
3711 case SPEED_100:
3712 /* maybe add some timeout factor ? */
3713 break;
3716 netif_carrier_on(netdev);
3718 igb_ping_all_vfs(adapter);
3719 igb_check_vf_rate_limit(adapter);
3721 /* link state has changed, schedule phy info update */
3722 if (!test_bit(__IGB_DOWN, &adapter->state))
3723 mod_timer(&adapter->phy_info_timer,
3724 round_jiffies(jiffies + 2 * HZ));
3726 } else {
3727 if (netif_carrier_ok(netdev)) {
3728 adapter->link_speed = 0;
3729 adapter->link_duplex = 0;
3731 /* check for thermal sensor event */
3732 if (igb_thermal_sensor_event(hw,
3733 E1000_THSTAT_PWR_DOWN)) {
3734 netdev_err(netdev, "The network adapter was "
3735 "stopped because it overheated\n");
3738 /* Links status message must follow this format */
3739 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3740 netdev->name);
3741 netif_carrier_off(netdev);
3743 igb_ping_all_vfs(adapter);
3745 /* link state has changed, schedule phy info update */
3746 if (!test_bit(__IGB_DOWN, &adapter->state))
3747 mod_timer(&adapter->phy_info_timer,
3748 round_jiffies(jiffies + 2 * HZ));
3750 pm_schedule_suspend(netdev->dev.parent,
3751 MSEC_PER_SEC * 5);
3755 spin_lock(&adapter->stats64_lock);
3756 igb_update_stats(adapter, &adapter->stats64);
3757 spin_unlock(&adapter->stats64_lock);
3759 for (i = 0; i < adapter->num_tx_queues; i++) {
3760 struct igb_ring *tx_ring = adapter->tx_ring[i];
3761 if (!netif_carrier_ok(netdev)) {
3762 /* We've lost link, so the controller stops DMA,
3763 * but we've got queued Tx work that's never going
3764 * to get done, so reset controller to flush Tx.
3765 * (Do the reset outside of interrupt context). */
3766 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3767 adapter->tx_timeout_count++;
3768 schedule_work(&adapter->reset_task);
3769 /* return immediately since reset is imminent */
3770 return;
3774 /* Force detection of hung controller every watchdog period */
3775 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3778 /* Cause software interrupt to ensure rx ring is cleaned */
3779 if (adapter->msix_entries) {
3780 u32 eics = 0;
3781 for (i = 0; i < adapter->num_q_vectors; i++)
3782 eics |= adapter->q_vector[i]->eims_value;
3783 wr32(E1000_EICS, eics);
3784 } else {
3785 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3788 igb_spoof_check(adapter);
3790 /* Reset the timer */
3791 if (!test_bit(__IGB_DOWN, &adapter->state))
3792 mod_timer(&adapter->watchdog_timer,
3793 round_jiffies(jiffies + 2 * HZ));
3796 enum latency_range {
3797 lowest_latency = 0,
3798 low_latency = 1,
3799 bulk_latency = 2,
3800 latency_invalid = 255
3804 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3806 * Stores a new ITR value based on strictly on packet size. This
3807 * algorithm is less sophisticated than that used in igb_update_itr,
3808 * due to the difficulty of synchronizing statistics across multiple
3809 * receive rings. The divisors and thresholds used by this function
3810 * were determined based on theoretical maximum wire speed and testing
3811 * data, in order to minimize response time while increasing bulk
3812 * throughput.
3813 * This functionality is controlled by the InterruptThrottleRate module
3814 * parameter (see igb_param.c)
3815 * NOTE: This function is called only when operating in a multiqueue
3816 * receive environment.
3817 * @q_vector: pointer to q_vector
3819 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3821 int new_val = q_vector->itr_val;
3822 int avg_wire_size = 0;
3823 struct igb_adapter *adapter = q_vector->adapter;
3824 unsigned int packets;
3826 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3827 * ints/sec - ITR timer value of 120 ticks.
3829 if (adapter->link_speed != SPEED_1000) {
3830 new_val = IGB_4K_ITR;
3831 goto set_itr_val;
3834 packets = q_vector->rx.total_packets;
3835 if (packets)
3836 avg_wire_size = q_vector->rx.total_bytes / packets;
3838 packets = q_vector->tx.total_packets;
3839 if (packets)
3840 avg_wire_size = max_t(u32, avg_wire_size,
3841 q_vector->tx.total_bytes / packets);
3843 /* if avg_wire_size isn't set no work was done */
3844 if (!avg_wire_size)
3845 goto clear_counts;
3847 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3848 avg_wire_size += 24;
3850 /* Don't starve jumbo frames */
3851 avg_wire_size = min(avg_wire_size, 3000);
3853 /* Give a little boost to mid-size frames */
3854 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3855 new_val = avg_wire_size / 3;
3856 else
3857 new_val = avg_wire_size / 2;
3859 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3860 if (new_val < IGB_20K_ITR &&
3861 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3862 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3863 new_val = IGB_20K_ITR;
3865 set_itr_val:
3866 if (new_val != q_vector->itr_val) {
3867 q_vector->itr_val = new_val;
3868 q_vector->set_itr = 1;
3870 clear_counts:
3871 q_vector->rx.total_bytes = 0;
3872 q_vector->rx.total_packets = 0;
3873 q_vector->tx.total_bytes = 0;
3874 q_vector->tx.total_packets = 0;
3878 * igb_update_itr - update the dynamic ITR value based on statistics
3879 * Stores a new ITR value based on packets and byte
3880 * counts during the last interrupt. The advantage of per interrupt
3881 * computation is faster updates and more accurate ITR for the current
3882 * traffic pattern. Constants in this function were computed
3883 * based on theoretical maximum wire speed and thresholds were set based
3884 * on testing data as well as attempting to minimize response time
3885 * while increasing bulk throughput.
3886 * this functionality is controlled by the InterruptThrottleRate module
3887 * parameter (see igb_param.c)
3888 * NOTE: These calculations are only valid when operating in a single-
3889 * queue environment.
3890 * @q_vector: pointer to q_vector
3891 * @ring_container: ring info to update the itr for
3893 static void igb_update_itr(struct igb_q_vector *q_vector,
3894 struct igb_ring_container *ring_container)
3896 unsigned int packets = ring_container->total_packets;
3897 unsigned int bytes = ring_container->total_bytes;
3898 u8 itrval = ring_container->itr;
3900 /* no packets, exit with status unchanged */
3901 if (packets == 0)
3902 return;
3904 switch (itrval) {
3905 case lowest_latency:
3906 /* handle TSO and jumbo frames */
3907 if (bytes/packets > 8000)
3908 itrval = bulk_latency;
3909 else if ((packets < 5) && (bytes > 512))
3910 itrval = low_latency;
3911 break;
3912 case low_latency: /* 50 usec aka 20000 ints/s */
3913 if (bytes > 10000) {
3914 /* this if handles the TSO accounting */
3915 if (bytes/packets > 8000) {
3916 itrval = bulk_latency;
3917 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3918 itrval = bulk_latency;
3919 } else if ((packets > 35)) {
3920 itrval = lowest_latency;
3922 } else if (bytes/packets > 2000) {
3923 itrval = bulk_latency;
3924 } else if (packets <= 2 && bytes < 512) {
3925 itrval = lowest_latency;
3927 break;
3928 case bulk_latency: /* 250 usec aka 4000 ints/s */
3929 if (bytes > 25000) {
3930 if (packets > 35)
3931 itrval = low_latency;
3932 } else if (bytes < 1500) {
3933 itrval = low_latency;
3935 break;
3938 /* clear work counters since we have the values we need */
3939 ring_container->total_bytes = 0;
3940 ring_container->total_packets = 0;
3942 /* write updated itr to ring container */
3943 ring_container->itr = itrval;
3946 static void igb_set_itr(struct igb_q_vector *q_vector)
3948 struct igb_adapter *adapter = q_vector->adapter;
3949 u32 new_itr = q_vector->itr_val;
3950 u8 current_itr = 0;
3952 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3953 if (adapter->link_speed != SPEED_1000) {
3954 current_itr = 0;
3955 new_itr = IGB_4K_ITR;
3956 goto set_itr_now;
3959 igb_update_itr(q_vector, &q_vector->tx);
3960 igb_update_itr(q_vector, &q_vector->rx);
3962 current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3964 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3965 if (current_itr == lowest_latency &&
3966 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3967 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3968 current_itr = low_latency;
3970 switch (current_itr) {
3971 /* counts and packets in update_itr are dependent on these numbers */
3972 case lowest_latency:
3973 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3974 break;
3975 case low_latency:
3976 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3977 break;
3978 case bulk_latency:
3979 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
3980 break;
3981 default:
3982 break;
3985 set_itr_now:
3986 if (new_itr != q_vector->itr_val) {
3987 /* this attempts to bias the interrupt rate towards Bulk
3988 * by adding intermediate steps when interrupt rate is
3989 * increasing */
3990 new_itr = new_itr > q_vector->itr_val ?
3991 max((new_itr * q_vector->itr_val) /
3992 (new_itr + (q_vector->itr_val >> 2)),
3993 new_itr) :
3994 new_itr;
3995 /* Don't write the value here; it resets the adapter's
3996 * internal timer, and causes us to delay far longer than
3997 * we should between interrupts. Instead, we write the ITR
3998 * value at the beginning of the next interrupt so the timing
3999 * ends up being correct.
4001 q_vector->itr_val = new_itr;
4002 q_vector->set_itr = 1;
4006 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4007 u32 type_tucmd, u32 mss_l4len_idx)
4009 struct e1000_adv_tx_context_desc *context_desc;
4010 u16 i = tx_ring->next_to_use;
4012 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4014 i++;
4015 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4017 /* set bits to identify this as an advanced context descriptor */
4018 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4020 /* For 82575, context index must be unique per ring. */
4021 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4022 mss_l4len_idx |= tx_ring->reg_idx << 4;
4024 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
4025 context_desc->seqnum_seed = 0;
4026 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
4027 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4030 static int igb_tso(struct igb_ring *tx_ring,
4031 struct igb_tx_buffer *first,
4032 u8 *hdr_len)
4034 struct sk_buff *skb = first->skb;
4035 u32 vlan_macip_lens, type_tucmd;
4036 u32 mss_l4len_idx, l4len;
4038 if (!skb_is_gso(skb))
4039 return 0;
4041 if (skb_header_cloned(skb)) {
4042 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4043 if (err)
4044 return err;
4047 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4048 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4050 if (first->protocol == __constant_htons(ETH_P_IP)) {
4051 struct iphdr *iph = ip_hdr(skb);
4052 iph->tot_len = 0;
4053 iph->check = 0;
4054 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4055 iph->daddr, 0,
4056 IPPROTO_TCP,
4058 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4059 first->tx_flags |= IGB_TX_FLAGS_TSO |
4060 IGB_TX_FLAGS_CSUM |
4061 IGB_TX_FLAGS_IPV4;
4062 } else if (skb_is_gso_v6(skb)) {
4063 ipv6_hdr(skb)->payload_len = 0;
4064 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4065 &ipv6_hdr(skb)->daddr,
4066 0, IPPROTO_TCP, 0);
4067 first->tx_flags |= IGB_TX_FLAGS_TSO |
4068 IGB_TX_FLAGS_CSUM;
4071 /* compute header lengths */
4072 l4len = tcp_hdrlen(skb);
4073 *hdr_len = skb_transport_offset(skb) + l4len;
4075 /* update gso size and bytecount with header size */
4076 first->gso_segs = skb_shinfo(skb)->gso_segs;
4077 first->bytecount += (first->gso_segs - 1) * *hdr_len;
4079 /* MSS L4LEN IDX */
4080 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4081 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4083 /* VLAN MACLEN IPLEN */
4084 vlan_macip_lens = skb_network_header_len(skb);
4085 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4086 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4088 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4090 return 1;
4093 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4095 struct sk_buff *skb = first->skb;
4096 u32 vlan_macip_lens = 0;
4097 u32 mss_l4len_idx = 0;
4098 u32 type_tucmd = 0;
4100 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4101 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4102 return;
4103 } else {
4104 u8 l4_hdr = 0;
4105 switch (first->protocol) {
4106 case __constant_htons(ETH_P_IP):
4107 vlan_macip_lens |= skb_network_header_len(skb);
4108 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4109 l4_hdr = ip_hdr(skb)->protocol;
4110 break;
4111 case __constant_htons(ETH_P_IPV6):
4112 vlan_macip_lens |= skb_network_header_len(skb);
4113 l4_hdr = ipv6_hdr(skb)->nexthdr;
4114 break;
4115 default:
4116 if (unlikely(net_ratelimit())) {
4117 dev_warn(tx_ring->dev,
4118 "partial checksum but proto=%x!\n",
4119 first->protocol);
4121 break;
4124 switch (l4_hdr) {
4125 case IPPROTO_TCP:
4126 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4127 mss_l4len_idx = tcp_hdrlen(skb) <<
4128 E1000_ADVTXD_L4LEN_SHIFT;
4129 break;
4130 case IPPROTO_SCTP:
4131 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4132 mss_l4len_idx = sizeof(struct sctphdr) <<
4133 E1000_ADVTXD_L4LEN_SHIFT;
4134 break;
4135 case IPPROTO_UDP:
4136 mss_l4len_idx = sizeof(struct udphdr) <<
4137 E1000_ADVTXD_L4LEN_SHIFT;
4138 break;
4139 default:
4140 if (unlikely(net_ratelimit())) {
4141 dev_warn(tx_ring->dev,
4142 "partial checksum but l4 proto=%x!\n",
4143 l4_hdr);
4145 break;
4148 /* update TX checksum flag */
4149 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4152 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4153 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4155 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4158 static __le32 igb_tx_cmd_type(u32 tx_flags)
4160 /* set type for advanced descriptor with frame checksum insertion */
4161 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4162 E1000_ADVTXD_DCMD_IFCS |
4163 E1000_ADVTXD_DCMD_DEXT);
4165 /* set HW vlan bit if vlan is present */
4166 if (tx_flags & IGB_TX_FLAGS_VLAN)
4167 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4169 /* set timestamp bit if present */
4170 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4171 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4173 /* set segmentation bits for TSO */
4174 if (tx_flags & IGB_TX_FLAGS_TSO)
4175 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4177 return cmd_type;
4180 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4181 union e1000_adv_tx_desc *tx_desc,
4182 u32 tx_flags, unsigned int paylen)
4184 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4186 /* 82575 requires a unique index per ring if any offload is enabled */
4187 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4188 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4189 olinfo_status |= tx_ring->reg_idx << 4;
4191 /* insert L4 checksum */
4192 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4193 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4195 /* insert IPv4 checksum */
4196 if (tx_flags & IGB_TX_FLAGS_IPV4)
4197 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4200 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4204 * The largest size we can write to the descriptor is 65535. In order to
4205 * maintain a power of two alignment we have to limit ourselves to 32K.
4207 #define IGB_MAX_TXD_PWR 15
4208 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4210 static void igb_tx_map(struct igb_ring *tx_ring,
4211 struct igb_tx_buffer *first,
4212 const u8 hdr_len)
4214 struct sk_buff *skb = first->skb;
4215 struct igb_tx_buffer *tx_buffer_info;
4216 union e1000_adv_tx_desc *tx_desc;
4217 dma_addr_t dma;
4218 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4219 unsigned int data_len = skb->data_len;
4220 unsigned int size = skb_headlen(skb);
4221 unsigned int paylen = skb->len - hdr_len;
4222 __le32 cmd_type;
4223 u32 tx_flags = first->tx_flags;
4224 u16 i = tx_ring->next_to_use;
4226 tx_desc = IGB_TX_DESC(tx_ring, i);
4228 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4229 cmd_type = igb_tx_cmd_type(tx_flags);
4231 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4232 if (dma_mapping_error(tx_ring->dev, dma))
4233 goto dma_error;
4235 /* record length, and DMA address */
4236 first->length = size;
4237 first->dma = dma;
4238 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4240 for (;;) {
4241 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4242 tx_desc->read.cmd_type_len =
4243 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4245 i++;
4246 tx_desc++;
4247 if (i == tx_ring->count) {
4248 tx_desc = IGB_TX_DESC(tx_ring, 0);
4249 i = 0;
4252 dma += IGB_MAX_DATA_PER_TXD;
4253 size -= IGB_MAX_DATA_PER_TXD;
4255 tx_desc->read.olinfo_status = 0;
4256 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4259 if (likely(!data_len))
4260 break;
4262 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4264 i++;
4265 tx_desc++;
4266 if (i == tx_ring->count) {
4267 tx_desc = IGB_TX_DESC(tx_ring, 0);
4268 i = 0;
4271 size = skb_frag_size(frag);
4272 data_len -= size;
4274 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4275 size, DMA_TO_DEVICE);
4276 if (dma_mapping_error(tx_ring->dev, dma))
4277 goto dma_error;
4279 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4280 tx_buffer_info->length = size;
4281 tx_buffer_info->dma = dma;
4283 tx_desc->read.olinfo_status = 0;
4284 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4286 frag++;
4289 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4291 /* write last descriptor with RS and EOP bits */
4292 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4293 tx_desc->read.cmd_type_len = cmd_type;
4295 /* set the timestamp */
4296 first->time_stamp = jiffies;
4299 * Force memory writes to complete before letting h/w know there
4300 * are new descriptors to fetch. (Only applicable for weak-ordered
4301 * memory model archs, such as IA-64).
4303 * We also need this memory barrier to make certain all of the
4304 * status bits have been updated before next_to_watch is written.
4306 wmb();
4308 /* set next_to_watch value indicating a packet is present */
4309 first->next_to_watch = tx_desc;
4311 i++;
4312 if (i == tx_ring->count)
4313 i = 0;
4315 tx_ring->next_to_use = i;
4317 writel(i, tx_ring->tail);
4319 /* we need this if more than one processor can write to our tail
4320 * at a time, it syncronizes IO on IA64/Altix systems */
4321 mmiowb();
4323 return;
4325 dma_error:
4326 dev_err(tx_ring->dev, "TX DMA map failed\n");
4328 /* clear dma mappings for failed tx_buffer_info map */
4329 for (;;) {
4330 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4331 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4332 if (tx_buffer_info == first)
4333 break;
4334 if (i == 0)
4335 i = tx_ring->count;
4336 i--;
4339 tx_ring->next_to_use = i;
4342 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4344 struct net_device *netdev = tx_ring->netdev;
4346 netif_stop_subqueue(netdev, tx_ring->queue_index);
4348 /* Herbert's original patch had:
4349 * smp_mb__after_netif_stop_queue();
4350 * but since that doesn't exist yet, just open code it. */
4351 smp_mb();
4353 /* We need to check again in a case another CPU has just
4354 * made room available. */
4355 if (igb_desc_unused(tx_ring) < size)
4356 return -EBUSY;
4358 /* A reprieve! */
4359 netif_wake_subqueue(netdev, tx_ring->queue_index);
4361 u64_stats_update_begin(&tx_ring->tx_syncp2);
4362 tx_ring->tx_stats.restart_queue2++;
4363 u64_stats_update_end(&tx_ring->tx_syncp2);
4365 return 0;
4368 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4370 if (igb_desc_unused(tx_ring) >= size)
4371 return 0;
4372 return __igb_maybe_stop_tx(tx_ring, size);
4375 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4376 struct igb_ring *tx_ring)
4378 struct igb_tx_buffer *first;
4379 int tso;
4380 u32 tx_flags = 0;
4381 __be16 protocol = vlan_get_protocol(skb);
4382 u8 hdr_len = 0;
4384 /* need: 1 descriptor per page,
4385 * + 2 desc gap to keep tail from touching head,
4386 * + 1 desc for skb->data,
4387 * + 1 desc for context descriptor,
4388 * otherwise try next time */
4389 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4390 /* this is a hard error */
4391 return NETDEV_TX_BUSY;
4394 /* record the location of the first descriptor for this packet */
4395 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4396 first->skb = skb;
4397 first->bytecount = skb->len;
4398 first->gso_segs = 1;
4400 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4401 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4402 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4405 if (vlan_tx_tag_present(skb)) {
4406 tx_flags |= IGB_TX_FLAGS_VLAN;
4407 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4410 /* record initial flags and protocol */
4411 first->tx_flags = tx_flags;
4412 first->protocol = protocol;
4414 tso = igb_tso(tx_ring, first, &hdr_len);
4415 if (tso < 0)
4416 goto out_drop;
4417 else if (!tso)
4418 igb_tx_csum(tx_ring, first);
4420 igb_tx_map(tx_ring, first, hdr_len);
4422 /* Make sure there is space in the ring for the next send. */
4423 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4425 return NETDEV_TX_OK;
4427 out_drop:
4428 igb_unmap_and_free_tx_resource(tx_ring, first);
4430 return NETDEV_TX_OK;
4433 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4434 struct sk_buff *skb)
4436 unsigned int r_idx = skb->queue_mapping;
4438 if (r_idx >= adapter->num_tx_queues)
4439 r_idx = r_idx % adapter->num_tx_queues;
4441 return adapter->tx_ring[r_idx];
4444 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4445 struct net_device *netdev)
4447 struct igb_adapter *adapter = netdev_priv(netdev);
4449 if (test_bit(__IGB_DOWN, &adapter->state)) {
4450 dev_kfree_skb_any(skb);
4451 return NETDEV_TX_OK;
4454 if (skb->len <= 0) {
4455 dev_kfree_skb_any(skb);
4456 return NETDEV_TX_OK;
4460 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4461 * in order to meet this minimum size requirement.
4463 if (skb->len < 17) {
4464 if (skb_padto(skb, 17))
4465 return NETDEV_TX_OK;
4466 skb->len = 17;
4469 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4473 * igb_tx_timeout - Respond to a Tx Hang
4474 * @netdev: network interface device structure
4476 static void igb_tx_timeout(struct net_device *netdev)
4478 struct igb_adapter *adapter = netdev_priv(netdev);
4479 struct e1000_hw *hw = &adapter->hw;
4481 /* Do the reset outside of interrupt context */
4482 adapter->tx_timeout_count++;
4484 if (hw->mac.type >= e1000_82580)
4485 hw->dev_spec._82575.global_device_reset = true;
4487 schedule_work(&adapter->reset_task);
4488 wr32(E1000_EICS,
4489 (adapter->eims_enable_mask & ~adapter->eims_other));
4492 static void igb_reset_task(struct work_struct *work)
4494 struct igb_adapter *adapter;
4495 adapter = container_of(work, struct igb_adapter, reset_task);
4497 igb_dump(adapter);
4498 netdev_err(adapter->netdev, "Reset adapter\n");
4499 igb_reinit_locked(adapter);
4503 * igb_get_stats64 - Get System Network Statistics
4504 * @netdev: network interface device structure
4505 * @stats: rtnl_link_stats64 pointer
4508 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4509 struct rtnl_link_stats64 *stats)
4511 struct igb_adapter *adapter = netdev_priv(netdev);
4513 spin_lock(&adapter->stats64_lock);
4514 igb_update_stats(adapter, &adapter->stats64);
4515 memcpy(stats, &adapter->stats64, sizeof(*stats));
4516 spin_unlock(&adapter->stats64_lock);
4518 return stats;
4522 * igb_change_mtu - Change the Maximum Transfer Unit
4523 * @netdev: network interface device structure
4524 * @new_mtu: new value for maximum frame size
4526 * Returns 0 on success, negative on failure
4528 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4530 struct igb_adapter *adapter = netdev_priv(netdev);
4531 struct pci_dev *pdev = adapter->pdev;
4532 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4534 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4535 dev_err(&pdev->dev, "Invalid MTU setting\n");
4536 return -EINVAL;
4539 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4540 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4541 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4542 return -EINVAL;
4545 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4546 msleep(1);
4548 /* igb_down has a dependency on max_frame_size */
4549 adapter->max_frame_size = max_frame;
4551 if (netif_running(netdev))
4552 igb_down(adapter);
4554 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4555 netdev->mtu, new_mtu);
4556 netdev->mtu = new_mtu;
4558 if (netif_running(netdev))
4559 igb_up(adapter);
4560 else
4561 igb_reset(adapter);
4563 clear_bit(__IGB_RESETTING, &adapter->state);
4565 return 0;
4569 * igb_update_stats - Update the board statistics counters
4570 * @adapter: board private structure
4573 void igb_update_stats(struct igb_adapter *adapter,
4574 struct rtnl_link_stats64 *net_stats)
4576 struct e1000_hw *hw = &adapter->hw;
4577 struct pci_dev *pdev = adapter->pdev;
4578 u32 reg, mpc;
4579 u16 phy_tmp;
4580 int i;
4581 u64 bytes, packets;
4582 unsigned int start;
4583 u64 _bytes, _packets;
4585 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4588 * Prevent stats update while adapter is being reset, or if the pci
4589 * connection is down.
4591 if (adapter->link_speed == 0)
4592 return;
4593 if (pci_channel_offline(pdev))
4594 return;
4596 bytes = 0;
4597 packets = 0;
4598 for (i = 0; i < adapter->num_rx_queues; i++) {
4599 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4600 struct igb_ring *ring = adapter->rx_ring[i];
4602 ring->rx_stats.drops += rqdpc_tmp;
4603 net_stats->rx_fifo_errors += rqdpc_tmp;
4605 do {
4606 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4607 _bytes = ring->rx_stats.bytes;
4608 _packets = ring->rx_stats.packets;
4609 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4610 bytes += _bytes;
4611 packets += _packets;
4614 net_stats->rx_bytes = bytes;
4615 net_stats->rx_packets = packets;
4617 bytes = 0;
4618 packets = 0;
4619 for (i = 0; i < adapter->num_tx_queues; i++) {
4620 struct igb_ring *ring = adapter->tx_ring[i];
4621 do {
4622 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4623 _bytes = ring->tx_stats.bytes;
4624 _packets = ring->tx_stats.packets;
4625 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4626 bytes += _bytes;
4627 packets += _packets;
4629 net_stats->tx_bytes = bytes;
4630 net_stats->tx_packets = packets;
4632 /* read stats registers */
4633 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4634 adapter->stats.gprc += rd32(E1000_GPRC);
4635 adapter->stats.gorc += rd32(E1000_GORCL);
4636 rd32(E1000_GORCH); /* clear GORCL */
4637 adapter->stats.bprc += rd32(E1000_BPRC);
4638 adapter->stats.mprc += rd32(E1000_MPRC);
4639 adapter->stats.roc += rd32(E1000_ROC);
4641 adapter->stats.prc64 += rd32(E1000_PRC64);
4642 adapter->stats.prc127 += rd32(E1000_PRC127);
4643 adapter->stats.prc255 += rd32(E1000_PRC255);
4644 adapter->stats.prc511 += rd32(E1000_PRC511);
4645 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4646 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4647 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4648 adapter->stats.sec += rd32(E1000_SEC);
4650 mpc = rd32(E1000_MPC);
4651 adapter->stats.mpc += mpc;
4652 net_stats->rx_fifo_errors += mpc;
4653 adapter->stats.scc += rd32(E1000_SCC);
4654 adapter->stats.ecol += rd32(E1000_ECOL);
4655 adapter->stats.mcc += rd32(E1000_MCC);
4656 adapter->stats.latecol += rd32(E1000_LATECOL);
4657 adapter->stats.dc += rd32(E1000_DC);
4658 adapter->stats.rlec += rd32(E1000_RLEC);
4659 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4660 adapter->stats.xontxc += rd32(E1000_XONTXC);
4661 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4662 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4663 adapter->stats.fcruc += rd32(E1000_FCRUC);
4664 adapter->stats.gptc += rd32(E1000_GPTC);
4665 adapter->stats.gotc += rd32(E1000_GOTCL);
4666 rd32(E1000_GOTCH); /* clear GOTCL */
4667 adapter->stats.rnbc += rd32(E1000_RNBC);
4668 adapter->stats.ruc += rd32(E1000_RUC);
4669 adapter->stats.rfc += rd32(E1000_RFC);
4670 adapter->stats.rjc += rd32(E1000_RJC);
4671 adapter->stats.tor += rd32(E1000_TORH);
4672 adapter->stats.tot += rd32(E1000_TOTH);
4673 adapter->stats.tpr += rd32(E1000_TPR);
4675 adapter->stats.ptc64 += rd32(E1000_PTC64);
4676 adapter->stats.ptc127 += rd32(E1000_PTC127);
4677 adapter->stats.ptc255 += rd32(E1000_PTC255);
4678 adapter->stats.ptc511 += rd32(E1000_PTC511);
4679 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4680 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4682 adapter->stats.mptc += rd32(E1000_MPTC);
4683 adapter->stats.bptc += rd32(E1000_BPTC);
4685 adapter->stats.tpt += rd32(E1000_TPT);
4686 adapter->stats.colc += rd32(E1000_COLC);
4688 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4689 /* read internal phy specific stats */
4690 reg = rd32(E1000_CTRL_EXT);
4691 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4692 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4693 adapter->stats.tncrs += rd32(E1000_TNCRS);
4696 adapter->stats.tsctc += rd32(E1000_TSCTC);
4697 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4699 adapter->stats.iac += rd32(E1000_IAC);
4700 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4701 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4702 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4703 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4704 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4705 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4706 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4707 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4709 /* Fill out the OS statistics structure */
4710 net_stats->multicast = adapter->stats.mprc;
4711 net_stats->collisions = adapter->stats.colc;
4713 /* Rx Errors */
4715 /* RLEC on some newer hardware can be incorrect so build
4716 * our own version based on RUC and ROC */
4717 net_stats->rx_errors = adapter->stats.rxerrc +
4718 adapter->stats.crcerrs + adapter->stats.algnerrc +
4719 adapter->stats.ruc + adapter->stats.roc +
4720 adapter->stats.cexterr;
4721 net_stats->rx_length_errors = adapter->stats.ruc +
4722 adapter->stats.roc;
4723 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4724 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4725 net_stats->rx_missed_errors = adapter->stats.mpc;
4727 /* Tx Errors */
4728 net_stats->tx_errors = adapter->stats.ecol +
4729 adapter->stats.latecol;
4730 net_stats->tx_aborted_errors = adapter->stats.ecol;
4731 net_stats->tx_window_errors = adapter->stats.latecol;
4732 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4734 /* Tx Dropped needs to be maintained elsewhere */
4736 /* Phy Stats */
4737 if (hw->phy.media_type == e1000_media_type_copper) {
4738 if ((adapter->link_speed == SPEED_1000) &&
4739 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4740 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4741 adapter->phy_stats.idle_errors += phy_tmp;
4745 /* Management Stats */
4746 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4747 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4748 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4750 /* OS2BMC Stats */
4751 reg = rd32(E1000_MANC);
4752 if (reg & E1000_MANC_EN_BMC2OS) {
4753 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4754 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4755 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4756 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4760 static irqreturn_t igb_msix_other(int irq, void *data)
4762 struct igb_adapter *adapter = data;
4763 struct e1000_hw *hw = &adapter->hw;
4764 u32 icr = rd32(E1000_ICR);
4765 /* reading ICR causes bit 31 of EICR to be cleared */
4767 if (icr & E1000_ICR_DRSTA)
4768 schedule_work(&adapter->reset_task);
4770 if (icr & E1000_ICR_DOUTSYNC) {
4771 /* HW is reporting DMA is out of sync */
4772 adapter->stats.doosync++;
4773 /* The DMA Out of Sync is also indication of a spoof event
4774 * in IOV mode. Check the Wrong VM Behavior register to
4775 * see if it is really a spoof event. */
4776 igb_check_wvbr(adapter);
4779 /* Check for a mailbox event */
4780 if (icr & E1000_ICR_VMMB)
4781 igb_msg_task(adapter);
4783 if (icr & E1000_ICR_LSC) {
4784 hw->mac.get_link_status = 1;
4785 /* guard against interrupt when we're going down */
4786 if (!test_bit(__IGB_DOWN, &adapter->state))
4787 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4790 wr32(E1000_EIMS, adapter->eims_other);
4792 return IRQ_HANDLED;
4795 static void igb_write_itr(struct igb_q_vector *q_vector)
4797 struct igb_adapter *adapter = q_vector->adapter;
4798 u32 itr_val = q_vector->itr_val & 0x7FFC;
4800 if (!q_vector->set_itr)
4801 return;
4803 if (!itr_val)
4804 itr_val = 0x4;
4806 if (adapter->hw.mac.type == e1000_82575)
4807 itr_val |= itr_val << 16;
4808 else
4809 itr_val |= E1000_EITR_CNT_IGNR;
4811 writel(itr_val, q_vector->itr_register);
4812 q_vector->set_itr = 0;
4815 static irqreturn_t igb_msix_ring(int irq, void *data)
4817 struct igb_q_vector *q_vector = data;
4819 /* Write the ITR value calculated from the previous interrupt. */
4820 igb_write_itr(q_vector);
4822 napi_schedule(&q_vector->napi);
4824 return IRQ_HANDLED;
4827 #ifdef CONFIG_IGB_DCA
4828 static void igb_update_dca(struct igb_q_vector *q_vector)
4830 struct igb_adapter *adapter = q_vector->adapter;
4831 struct e1000_hw *hw = &adapter->hw;
4832 int cpu = get_cpu();
4834 if (q_vector->cpu == cpu)
4835 goto out_no_update;
4837 if (q_vector->tx.ring) {
4838 int q = q_vector->tx.ring->reg_idx;
4839 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4840 if (hw->mac.type == e1000_82575) {
4841 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4842 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4843 } else {
4844 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4845 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4846 E1000_DCA_TXCTRL_CPUID_SHIFT;
4848 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4849 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4851 if (q_vector->rx.ring) {
4852 int q = q_vector->rx.ring->reg_idx;
4853 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4854 if (hw->mac.type == e1000_82575) {
4855 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4856 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4857 } else {
4858 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4859 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4860 E1000_DCA_RXCTRL_CPUID_SHIFT;
4862 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4863 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4864 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4865 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4867 q_vector->cpu = cpu;
4868 out_no_update:
4869 put_cpu();
4872 static void igb_setup_dca(struct igb_adapter *adapter)
4874 struct e1000_hw *hw = &adapter->hw;
4875 int i;
4877 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4878 return;
4880 /* Always use CB2 mode, difference is masked in the CB driver. */
4881 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4883 for (i = 0; i < adapter->num_q_vectors; i++) {
4884 adapter->q_vector[i]->cpu = -1;
4885 igb_update_dca(adapter->q_vector[i]);
4889 static int __igb_notify_dca(struct device *dev, void *data)
4891 struct net_device *netdev = dev_get_drvdata(dev);
4892 struct igb_adapter *adapter = netdev_priv(netdev);
4893 struct pci_dev *pdev = adapter->pdev;
4894 struct e1000_hw *hw = &adapter->hw;
4895 unsigned long event = *(unsigned long *)data;
4897 switch (event) {
4898 case DCA_PROVIDER_ADD:
4899 /* if already enabled, don't do it again */
4900 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4901 break;
4902 if (dca_add_requester(dev) == 0) {
4903 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4904 dev_info(&pdev->dev, "DCA enabled\n");
4905 igb_setup_dca(adapter);
4906 break;
4908 /* Fall Through since DCA is disabled. */
4909 case DCA_PROVIDER_REMOVE:
4910 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4911 /* without this a class_device is left
4912 * hanging around in the sysfs model */
4913 dca_remove_requester(dev);
4914 dev_info(&pdev->dev, "DCA disabled\n");
4915 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4916 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4918 break;
4921 return 0;
4924 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4925 void *p)
4927 int ret_val;
4929 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4930 __igb_notify_dca);
4932 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4934 #endif /* CONFIG_IGB_DCA */
4936 #ifdef CONFIG_PCI_IOV
4937 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4939 unsigned char mac_addr[ETH_ALEN];
4940 struct pci_dev *pdev = adapter->pdev;
4941 struct e1000_hw *hw = &adapter->hw;
4942 struct pci_dev *pvfdev;
4943 unsigned int device_id;
4944 u16 thisvf_devfn;
4946 random_ether_addr(mac_addr);
4947 igb_set_vf_mac(adapter, vf, mac_addr);
4949 switch (adapter->hw.mac.type) {
4950 case e1000_82576:
4951 device_id = IGB_82576_VF_DEV_ID;
4952 /* VF Stride for 82576 is 2 */
4953 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4954 (pdev->devfn & 1);
4955 break;
4956 case e1000_i350:
4957 device_id = IGB_I350_VF_DEV_ID;
4958 /* VF Stride for I350 is 4 */
4959 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4960 (pdev->devfn & 3);
4961 break;
4962 default:
4963 device_id = 0;
4964 thisvf_devfn = 0;
4965 break;
4968 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4969 while (pvfdev) {
4970 if (pvfdev->devfn == thisvf_devfn)
4971 break;
4972 pvfdev = pci_get_device(hw->vendor_id,
4973 device_id, pvfdev);
4976 if (pvfdev)
4977 adapter->vf_data[vf].vfdev = pvfdev;
4978 else
4979 dev_err(&pdev->dev,
4980 "Couldn't find pci dev ptr for VF %4.4x\n",
4981 thisvf_devfn);
4982 return pvfdev != NULL;
4985 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4987 struct e1000_hw *hw = &adapter->hw;
4988 struct pci_dev *pdev = adapter->pdev;
4989 struct pci_dev *pvfdev;
4990 u16 vf_devfn = 0;
4991 u16 vf_stride;
4992 unsigned int device_id;
4993 int vfs_found = 0;
4995 switch (adapter->hw.mac.type) {
4996 case e1000_82576:
4997 device_id = IGB_82576_VF_DEV_ID;
4998 /* VF Stride for 82576 is 2 */
4999 vf_stride = 2;
5000 break;
5001 case e1000_i350:
5002 device_id = IGB_I350_VF_DEV_ID;
5003 /* VF Stride for I350 is 4 */
5004 vf_stride = 4;
5005 break;
5006 default:
5007 device_id = 0;
5008 vf_stride = 0;
5009 break;
5012 vf_devfn = pdev->devfn + 0x80;
5013 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5014 while (pvfdev) {
5015 if (pvfdev->devfn == vf_devfn)
5016 vfs_found++;
5017 vf_devfn += vf_stride;
5018 pvfdev = pci_get_device(hw->vendor_id,
5019 device_id, pvfdev);
5022 return vfs_found;
5025 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5027 int i;
5028 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5029 if (adapter->vf_data[i].vfdev) {
5030 if (adapter->vf_data[i].vfdev->dev_flags &
5031 PCI_DEV_FLAGS_ASSIGNED)
5032 return true;
5035 return false;
5038 #endif
5039 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5041 struct e1000_hw *hw = &adapter->hw;
5042 u32 ping;
5043 int i;
5045 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5046 ping = E1000_PF_CONTROL_MSG;
5047 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5048 ping |= E1000_VT_MSGTYPE_CTS;
5049 igb_write_mbx(hw, &ping, 1, i);
5053 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5055 struct e1000_hw *hw = &adapter->hw;
5056 u32 vmolr = rd32(E1000_VMOLR(vf));
5057 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5059 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5060 IGB_VF_FLAG_MULTI_PROMISC);
5061 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5063 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5064 vmolr |= E1000_VMOLR_MPME;
5065 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5066 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5067 } else {
5069 * if we have hashes and we are clearing a multicast promisc
5070 * flag we need to write the hashes to the MTA as this step
5071 * was previously skipped
5073 if (vf_data->num_vf_mc_hashes > 30) {
5074 vmolr |= E1000_VMOLR_MPME;
5075 } else if (vf_data->num_vf_mc_hashes) {
5076 int j;
5077 vmolr |= E1000_VMOLR_ROMPE;
5078 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5079 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5083 wr32(E1000_VMOLR(vf), vmolr);
5085 /* there are flags left unprocessed, likely not supported */
5086 if (*msgbuf & E1000_VT_MSGINFO_MASK)
5087 return -EINVAL;
5089 return 0;
5093 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5094 u32 *msgbuf, u32 vf)
5096 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5097 u16 *hash_list = (u16 *)&msgbuf[1];
5098 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5099 int i;
5101 /* salt away the number of multicast addresses assigned
5102 * to this VF for later use to restore when the PF multi cast
5103 * list changes
5105 vf_data->num_vf_mc_hashes = n;
5107 /* only up to 30 hash values supported */
5108 if (n > 30)
5109 n = 30;
5111 /* store the hashes for later use */
5112 for (i = 0; i < n; i++)
5113 vf_data->vf_mc_hashes[i] = hash_list[i];
5115 /* Flush and reset the mta with the new values */
5116 igb_set_rx_mode(adapter->netdev);
5118 return 0;
5121 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5123 struct e1000_hw *hw = &adapter->hw;
5124 struct vf_data_storage *vf_data;
5125 int i, j;
5127 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5128 u32 vmolr = rd32(E1000_VMOLR(i));
5129 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5131 vf_data = &adapter->vf_data[i];
5133 if ((vf_data->num_vf_mc_hashes > 30) ||
5134 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5135 vmolr |= E1000_VMOLR_MPME;
5136 } else if (vf_data->num_vf_mc_hashes) {
5137 vmolr |= E1000_VMOLR_ROMPE;
5138 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5139 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5141 wr32(E1000_VMOLR(i), vmolr);
5145 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5147 struct e1000_hw *hw = &adapter->hw;
5148 u32 pool_mask, reg, vid;
5149 int i;
5151 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5153 /* Find the vlan filter for this id */
5154 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5155 reg = rd32(E1000_VLVF(i));
5157 /* remove the vf from the pool */
5158 reg &= ~pool_mask;
5160 /* if pool is empty then remove entry from vfta */
5161 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5162 (reg & E1000_VLVF_VLANID_ENABLE)) {
5163 reg = 0;
5164 vid = reg & E1000_VLVF_VLANID_MASK;
5165 igb_vfta_set(hw, vid, false);
5168 wr32(E1000_VLVF(i), reg);
5171 adapter->vf_data[vf].vlans_enabled = 0;
5174 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5176 struct e1000_hw *hw = &adapter->hw;
5177 u32 reg, i;
5179 /* The vlvf table only exists on 82576 hardware and newer */
5180 if (hw->mac.type < e1000_82576)
5181 return -1;
5183 /* we only need to do this if VMDq is enabled */
5184 if (!adapter->vfs_allocated_count)
5185 return -1;
5187 /* Find the vlan filter for this id */
5188 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5189 reg = rd32(E1000_VLVF(i));
5190 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5191 vid == (reg & E1000_VLVF_VLANID_MASK))
5192 break;
5195 if (add) {
5196 if (i == E1000_VLVF_ARRAY_SIZE) {
5197 /* Did not find a matching VLAN ID entry that was
5198 * enabled. Search for a free filter entry, i.e.
5199 * one without the enable bit set
5201 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5202 reg = rd32(E1000_VLVF(i));
5203 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5204 break;
5207 if (i < E1000_VLVF_ARRAY_SIZE) {
5208 /* Found an enabled/available entry */
5209 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5211 /* if !enabled we need to set this up in vfta */
5212 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5213 /* add VID to filter table */
5214 igb_vfta_set(hw, vid, true);
5215 reg |= E1000_VLVF_VLANID_ENABLE;
5217 reg &= ~E1000_VLVF_VLANID_MASK;
5218 reg |= vid;
5219 wr32(E1000_VLVF(i), reg);
5221 /* do not modify RLPML for PF devices */
5222 if (vf >= adapter->vfs_allocated_count)
5223 return 0;
5225 if (!adapter->vf_data[vf].vlans_enabled) {
5226 u32 size;
5227 reg = rd32(E1000_VMOLR(vf));
5228 size = reg & E1000_VMOLR_RLPML_MASK;
5229 size += 4;
5230 reg &= ~E1000_VMOLR_RLPML_MASK;
5231 reg |= size;
5232 wr32(E1000_VMOLR(vf), reg);
5235 adapter->vf_data[vf].vlans_enabled++;
5237 } else {
5238 if (i < E1000_VLVF_ARRAY_SIZE) {
5239 /* remove vf from the pool */
5240 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5241 /* if pool is empty then remove entry from vfta */
5242 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5243 reg = 0;
5244 igb_vfta_set(hw, vid, false);
5246 wr32(E1000_VLVF(i), reg);
5248 /* do not modify RLPML for PF devices */
5249 if (vf >= adapter->vfs_allocated_count)
5250 return 0;
5252 adapter->vf_data[vf].vlans_enabled--;
5253 if (!adapter->vf_data[vf].vlans_enabled) {
5254 u32 size;
5255 reg = rd32(E1000_VMOLR(vf));
5256 size = reg & E1000_VMOLR_RLPML_MASK;
5257 size -= 4;
5258 reg &= ~E1000_VMOLR_RLPML_MASK;
5259 reg |= size;
5260 wr32(E1000_VMOLR(vf), reg);
5264 return 0;
5267 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5269 struct e1000_hw *hw = &adapter->hw;
5271 if (vid)
5272 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5273 else
5274 wr32(E1000_VMVIR(vf), 0);
5277 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5278 int vf, u16 vlan, u8 qos)
5280 int err = 0;
5281 struct igb_adapter *adapter = netdev_priv(netdev);
5283 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5284 return -EINVAL;
5285 if (vlan || qos) {
5286 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5287 if (err)
5288 goto out;
5289 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5290 igb_set_vmolr(adapter, vf, !vlan);
5291 adapter->vf_data[vf].pf_vlan = vlan;
5292 adapter->vf_data[vf].pf_qos = qos;
5293 dev_info(&adapter->pdev->dev,
5294 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5295 if (test_bit(__IGB_DOWN, &adapter->state)) {
5296 dev_warn(&adapter->pdev->dev,
5297 "The VF VLAN has been set,"
5298 " but the PF device is not up.\n");
5299 dev_warn(&adapter->pdev->dev,
5300 "Bring the PF device up before"
5301 " attempting to use the VF device.\n");
5303 } else {
5304 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5305 false, vf);
5306 igb_set_vmvir(adapter, vlan, vf);
5307 igb_set_vmolr(adapter, vf, true);
5308 adapter->vf_data[vf].pf_vlan = 0;
5309 adapter->vf_data[vf].pf_qos = 0;
5311 out:
5312 return err;
5315 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5317 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5318 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5320 return igb_vlvf_set(adapter, vid, add, vf);
5323 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5325 /* clear flags - except flag that indicates PF has set the MAC */
5326 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5327 adapter->vf_data[vf].last_nack = jiffies;
5329 /* reset offloads to defaults */
5330 igb_set_vmolr(adapter, vf, true);
5332 /* reset vlans for device */
5333 igb_clear_vf_vfta(adapter, vf);
5334 if (adapter->vf_data[vf].pf_vlan)
5335 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5336 adapter->vf_data[vf].pf_vlan,
5337 adapter->vf_data[vf].pf_qos);
5338 else
5339 igb_clear_vf_vfta(adapter, vf);
5341 /* reset multicast table array for vf */
5342 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5344 /* Flush and reset the mta with the new values */
5345 igb_set_rx_mode(adapter->netdev);
5348 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5350 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5352 /* generate a new mac address as we were hotplug removed/added */
5353 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5354 random_ether_addr(vf_mac);
5356 /* process remaining reset events */
5357 igb_vf_reset(adapter, vf);
5360 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5362 struct e1000_hw *hw = &adapter->hw;
5363 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5364 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5365 u32 reg, msgbuf[3];
5366 u8 *addr = (u8 *)(&msgbuf[1]);
5368 /* process all the same items cleared in a function level reset */
5369 igb_vf_reset(adapter, vf);
5371 /* set vf mac address */
5372 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5374 /* enable transmit and receive for vf */
5375 reg = rd32(E1000_VFTE);
5376 wr32(E1000_VFTE, reg | (1 << vf));
5377 reg = rd32(E1000_VFRE);
5378 wr32(E1000_VFRE, reg | (1 << vf));
5380 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5382 /* reply to reset with ack and vf mac address */
5383 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5384 memcpy(addr, vf_mac, 6);
5385 igb_write_mbx(hw, msgbuf, 3, vf);
5388 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5391 * The VF MAC Address is stored in a packed array of bytes
5392 * starting at the second 32 bit word of the msg array
5394 unsigned char *addr = (char *)&msg[1];
5395 int err = -1;
5397 if (is_valid_ether_addr(addr))
5398 err = igb_set_vf_mac(adapter, vf, addr);
5400 return err;
5403 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5405 struct e1000_hw *hw = &adapter->hw;
5406 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5407 u32 msg = E1000_VT_MSGTYPE_NACK;
5409 /* if device isn't clear to send it shouldn't be reading either */
5410 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5411 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5412 igb_write_mbx(hw, &msg, 1, vf);
5413 vf_data->last_nack = jiffies;
5417 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5419 struct pci_dev *pdev = adapter->pdev;
5420 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5421 struct e1000_hw *hw = &adapter->hw;
5422 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5423 s32 retval;
5425 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5427 if (retval) {
5428 /* if receive failed revoke VF CTS stats and restart init */
5429 dev_err(&pdev->dev, "Error receiving message from VF\n");
5430 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5431 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5432 return;
5433 goto out;
5436 /* this is a message we already processed, do nothing */
5437 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5438 return;
5441 * until the vf completes a reset it should not be
5442 * allowed to start any configuration.
5445 if (msgbuf[0] == E1000_VF_RESET) {
5446 igb_vf_reset_msg(adapter, vf);
5447 return;
5450 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5451 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5452 return;
5453 retval = -1;
5454 goto out;
5457 switch ((msgbuf[0] & 0xFFFF)) {
5458 case E1000_VF_SET_MAC_ADDR:
5459 retval = -EINVAL;
5460 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5461 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5462 else
5463 dev_warn(&pdev->dev,
5464 "VF %d attempted to override administratively "
5465 "set MAC address\nReload the VF driver to "
5466 "resume operations\n", vf);
5467 break;
5468 case E1000_VF_SET_PROMISC:
5469 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5470 break;
5471 case E1000_VF_SET_MULTICAST:
5472 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5473 break;
5474 case E1000_VF_SET_LPE:
5475 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5476 break;
5477 case E1000_VF_SET_VLAN:
5478 retval = -1;
5479 if (vf_data->pf_vlan)
5480 dev_warn(&pdev->dev,
5481 "VF %d attempted to override administratively "
5482 "set VLAN tag\nReload the VF driver to "
5483 "resume operations\n", vf);
5484 else
5485 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5486 break;
5487 default:
5488 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5489 retval = -1;
5490 break;
5493 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5494 out:
5495 /* notify the VF of the results of what it sent us */
5496 if (retval)
5497 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5498 else
5499 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5501 igb_write_mbx(hw, msgbuf, 1, vf);
5504 static void igb_msg_task(struct igb_adapter *adapter)
5506 struct e1000_hw *hw = &adapter->hw;
5507 u32 vf;
5509 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5510 /* process any reset requests */
5511 if (!igb_check_for_rst(hw, vf))
5512 igb_vf_reset_event(adapter, vf);
5514 /* process any messages pending */
5515 if (!igb_check_for_msg(hw, vf))
5516 igb_rcv_msg_from_vf(adapter, vf);
5518 /* process any acks */
5519 if (!igb_check_for_ack(hw, vf))
5520 igb_rcv_ack_from_vf(adapter, vf);
5525 * igb_set_uta - Set unicast filter table address
5526 * @adapter: board private structure
5528 * The unicast table address is a register array of 32-bit registers.
5529 * The table is meant to be used in a way similar to how the MTA is used
5530 * however due to certain limitations in the hardware it is necessary to
5531 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5532 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5534 static void igb_set_uta(struct igb_adapter *adapter)
5536 struct e1000_hw *hw = &adapter->hw;
5537 int i;
5539 /* The UTA table only exists on 82576 hardware and newer */
5540 if (hw->mac.type < e1000_82576)
5541 return;
5543 /* we only need to do this if VMDq is enabled */
5544 if (!adapter->vfs_allocated_count)
5545 return;
5547 for (i = 0; i < hw->mac.uta_reg_count; i++)
5548 array_wr32(E1000_UTA, i, ~0);
5552 * igb_intr_msi - Interrupt Handler
5553 * @irq: interrupt number
5554 * @data: pointer to a network interface device structure
5556 static irqreturn_t igb_intr_msi(int irq, void *data)
5558 struct igb_adapter *adapter = data;
5559 struct igb_q_vector *q_vector = adapter->q_vector[0];
5560 struct e1000_hw *hw = &adapter->hw;
5561 /* read ICR disables interrupts using IAM */
5562 u32 icr = rd32(E1000_ICR);
5564 igb_write_itr(q_vector);
5566 if (icr & E1000_ICR_DRSTA)
5567 schedule_work(&adapter->reset_task);
5569 if (icr & E1000_ICR_DOUTSYNC) {
5570 /* HW is reporting DMA is out of sync */
5571 adapter->stats.doosync++;
5574 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5575 hw->mac.get_link_status = 1;
5576 if (!test_bit(__IGB_DOWN, &adapter->state))
5577 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5580 napi_schedule(&q_vector->napi);
5582 return IRQ_HANDLED;
5586 * igb_intr - Legacy Interrupt Handler
5587 * @irq: interrupt number
5588 * @data: pointer to a network interface device structure
5590 static irqreturn_t igb_intr(int irq, void *data)
5592 struct igb_adapter *adapter = data;
5593 struct igb_q_vector *q_vector = adapter->q_vector[0];
5594 struct e1000_hw *hw = &adapter->hw;
5595 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5596 * need for the IMC write */
5597 u32 icr = rd32(E1000_ICR);
5599 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5600 * not set, then the adapter didn't send an interrupt */
5601 if (!(icr & E1000_ICR_INT_ASSERTED))
5602 return IRQ_NONE;
5604 igb_write_itr(q_vector);
5606 if (icr & E1000_ICR_DRSTA)
5607 schedule_work(&adapter->reset_task);
5609 if (icr & E1000_ICR_DOUTSYNC) {
5610 /* HW is reporting DMA is out of sync */
5611 adapter->stats.doosync++;
5614 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5615 hw->mac.get_link_status = 1;
5616 /* guard against interrupt when we're going down */
5617 if (!test_bit(__IGB_DOWN, &adapter->state))
5618 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5621 napi_schedule(&q_vector->napi);
5623 return IRQ_HANDLED;
5626 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5628 struct igb_adapter *adapter = q_vector->adapter;
5629 struct e1000_hw *hw = &adapter->hw;
5631 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5632 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5633 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5634 igb_set_itr(q_vector);
5635 else
5636 igb_update_ring_itr(q_vector);
5639 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5640 if (adapter->msix_entries)
5641 wr32(E1000_EIMS, q_vector->eims_value);
5642 else
5643 igb_irq_enable(adapter);
5648 * igb_poll - NAPI Rx polling callback
5649 * @napi: napi polling structure
5650 * @budget: count of how many packets we should handle
5652 static int igb_poll(struct napi_struct *napi, int budget)
5654 struct igb_q_vector *q_vector = container_of(napi,
5655 struct igb_q_vector,
5656 napi);
5657 bool clean_complete = true;
5659 #ifdef CONFIG_IGB_DCA
5660 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5661 igb_update_dca(q_vector);
5662 #endif
5663 if (q_vector->tx.ring)
5664 clean_complete = igb_clean_tx_irq(q_vector);
5666 if (q_vector->rx.ring)
5667 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5669 /* If all work not completed, return budget and keep polling */
5670 if (!clean_complete)
5671 return budget;
5673 /* If not enough Rx work done, exit the polling mode */
5674 napi_complete(napi);
5675 igb_ring_irq_enable(q_vector);
5677 return 0;
5681 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5682 * @adapter: board private structure
5683 * @shhwtstamps: timestamp structure to update
5684 * @regval: unsigned 64bit system time value.
5686 * We need to convert the system time value stored in the RX/TXSTMP registers
5687 * into a hwtstamp which can be used by the upper level timestamping functions
5689 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5690 struct skb_shared_hwtstamps *shhwtstamps,
5691 u64 regval)
5693 u64 ns;
5696 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5697 * 24 to match clock shift we setup earlier.
5699 if (adapter->hw.mac.type >= e1000_82580)
5700 regval <<= IGB_82580_TSYNC_SHIFT;
5702 ns = timecounter_cyc2time(&adapter->clock, regval);
5703 timecompare_update(&adapter->compare, ns);
5704 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5705 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5706 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5710 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5711 * @q_vector: pointer to q_vector containing needed info
5712 * @buffer: pointer to igb_tx_buffer structure
5714 * If we were asked to do hardware stamping and such a time stamp is
5715 * available, then it must have been for this skb here because we only
5716 * allow only one such packet into the queue.
5718 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5719 struct igb_tx_buffer *buffer_info)
5721 struct igb_adapter *adapter = q_vector->adapter;
5722 struct e1000_hw *hw = &adapter->hw;
5723 struct skb_shared_hwtstamps shhwtstamps;
5724 u64 regval;
5726 /* if skb does not support hw timestamp or TX stamp not valid exit */
5727 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5728 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5729 return;
5731 regval = rd32(E1000_TXSTMPL);
5732 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5734 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5735 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5739 * igb_clean_tx_irq - Reclaim resources after transmit completes
5740 * @q_vector: pointer to q_vector containing needed info
5741 * returns true if ring is completely cleaned
5743 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5745 struct igb_adapter *adapter = q_vector->adapter;
5746 struct igb_ring *tx_ring = q_vector->tx.ring;
5747 struct igb_tx_buffer *tx_buffer;
5748 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5749 unsigned int total_bytes = 0, total_packets = 0;
5750 unsigned int budget = q_vector->tx.work_limit;
5751 unsigned int i = tx_ring->next_to_clean;
5753 if (test_bit(__IGB_DOWN, &adapter->state))
5754 return true;
5756 tx_buffer = &tx_ring->tx_buffer_info[i];
5757 tx_desc = IGB_TX_DESC(tx_ring, i);
5758 i -= tx_ring->count;
5760 for (; budget; budget--) {
5761 eop_desc = tx_buffer->next_to_watch;
5763 /* prevent any other reads prior to eop_desc */
5764 rmb();
5766 /* if next_to_watch is not set then there is no work pending */
5767 if (!eop_desc)
5768 break;
5770 /* if DD is not set pending work has not been completed */
5771 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5772 break;
5774 /* clear next_to_watch to prevent false hangs */
5775 tx_buffer->next_to_watch = NULL;
5777 /* update the statistics for this packet */
5778 total_bytes += tx_buffer->bytecount;
5779 total_packets += tx_buffer->gso_segs;
5781 /* retrieve hardware timestamp */
5782 igb_tx_hwtstamp(q_vector, tx_buffer);
5784 /* free the skb */
5785 dev_kfree_skb_any(tx_buffer->skb);
5786 tx_buffer->skb = NULL;
5788 /* unmap skb header data */
5789 dma_unmap_single(tx_ring->dev,
5790 tx_buffer->dma,
5791 tx_buffer->length,
5792 DMA_TO_DEVICE);
5794 /* clear last DMA location and unmap remaining buffers */
5795 while (tx_desc != eop_desc) {
5796 tx_buffer->dma = 0;
5798 tx_buffer++;
5799 tx_desc++;
5800 i++;
5801 if (unlikely(!i)) {
5802 i -= tx_ring->count;
5803 tx_buffer = tx_ring->tx_buffer_info;
5804 tx_desc = IGB_TX_DESC(tx_ring, 0);
5807 /* unmap any remaining paged data */
5808 if (tx_buffer->dma) {
5809 dma_unmap_page(tx_ring->dev,
5810 tx_buffer->dma,
5811 tx_buffer->length,
5812 DMA_TO_DEVICE);
5816 /* clear last DMA location */
5817 tx_buffer->dma = 0;
5819 /* move us one more past the eop_desc for start of next pkt */
5820 tx_buffer++;
5821 tx_desc++;
5822 i++;
5823 if (unlikely(!i)) {
5824 i -= tx_ring->count;
5825 tx_buffer = tx_ring->tx_buffer_info;
5826 tx_desc = IGB_TX_DESC(tx_ring, 0);
5830 netdev_tx_completed_queue(txring_txq(tx_ring),
5831 total_packets, total_bytes);
5832 i += tx_ring->count;
5833 tx_ring->next_to_clean = i;
5834 u64_stats_update_begin(&tx_ring->tx_syncp);
5835 tx_ring->tx_stats.bytes += total_bytes;
5836 tx_ring->tx_stats.packets += total_packets;
5837 u64_stats_update_end(&tx_ring->tx_syncp);
5838 q_vector->tx.total_bytes += total_bytes;
5839 q_vector->tx.total_packets += total_packets;
5841 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5842 struct e1000_hw *hw = &adapter->hw;
5844 eop_desc = tx_buffer->next_to_watch;
5846 /* Detect a transmit hang in hardware, this serializes the
5847 * check with the clearing of time_stamp and movement of i */
5848 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5849 if (eop_desc &&
5850 time_after(jiffies, tx_buffer->time_stamp +
5851 (adapter->tx_timeout_factor * HZ)) &&
5852 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5854 /* detected Tx unit hang */
5855 dev_err(tx_ring->dev,
5856 "Detected Tx Unit Hang\n"
5857 " Tx Queue <%d>\n"
5858 " TDH <%x>\n"
5859 " TDT <%x>\n"
5860 " next_to_use <%x>\n"
5861 " next_to_clean <%x>\n"
5862 "buffer_info[next_to_clean]\n"
5863 " time_stamp <%lx>\n"
5864 " next_to_watch <%p>\n"
5865 " jiffies <%lx>\n"
5866 " desc.status <%x>\n",
5867 tx_ring->queue_index,
5868 rd32(E1000_TDH(tx_ring->reg_idx)),
5869 readl(tx_ring->tail),
5870 tx_ring->next_to_use,
5871 tx_ring->next_to_clean,
5872 tx_buffer->time_stamp,
5873 eop_desc,
5874 jiffies,
5875 eop_desc->wb.status);
5876 netif_stop_subqueue(tx_ring->netdev,
5877 tx_ring->queue_index);
5879 /* we are about to reset, no point in enabling stuff */
5880 return true;
5884 if (unlikely(total_packets &&
5885 netif_carrier_ok(tx_ring->netdev) &&
5886 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5887 /* Make sure that anybody stopping the queue after this
5888 * sees the new next_to_clean.
5890 smp_mb();
5891 if (__netif_subqueue_stopped(tx_ring->netdev,
5892 tx_ring->queue_index) &&
5893 !(test_bit(__IGB_DOWN, &adapter->state))) {
5894 netif_wake_subqueue(tx_ring->netdev,
5895 tx_ring->queue_index);
5897 u64_stats_update_begin(&tx_ring->tx_syncp);
5898 tx_ring->tx_stats.restart_queue++;
5899 u64_stats_update_end(&tx_ring->tx_syncp);
5903 return !!budget;
5906 static inline void igb_rx_checksum(struct igb_ring *ring,
5907 union e1000_adv_rx_desc *rx_desc,
5908 struct sk_buff *skb)
5910 skb_checksum_none_assert(skb);
5912 /* Ignore Checksum bit is set */
5913 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5914 return;
5916 /* Rx checksum disabled via ethtool */
5917 if (!(ring->netdev->features & NETIF_F_RXCSUM))
5918 return;
5920 /* TCP/UDP checksum error bit is set */
5921 if (igb_test_staterr(rx_desc,
5922 E1000_RXDEXT_STATERR_TCPE |
5923 E1000_RXDEXT_STATERR_IPE)) {
5925 * work around errata with sctp packets where the TCPE aka
5926 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5927 * packets, (aka let the stack check the crc32c)
5929 if (!((skb->len == 60) &&
5930 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5931 u64_stats_update_begin(&ring->rx_syncp);
5932 ring->rx_stats.csum_err++;
5933 u64_stats_update_end(&ring->rx_syncp);
5935 /* let the stack verify checksum errors */
5936 return;
5938 /* It must be a TCP or UDP packet with a valid checksum */
5939 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5940 E1000_RXD_STAT_UDPCS))
5941 skb->ip_summed = CHECKSUM_UNNECESSARY;
5943 dev_dbg(ring->dev, "cksum success: bits %08X\n",
5944 le32_to_cpu(rx_desc->wb.upper.status_error));
5947 static inline void igb_rx_hash(struct igb_ring *ring,
5948 union e1000_adv_rx_desc *rx_desc,
5949 struct sk_buff *skb)
5951 if (ring->netdev->features & NETIF_F_RXHASH)
5952 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5955 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5956 union e1000_adv_rx_desc *rx_desc,
5957 struct sk_buff *skb)
5959 struct igb_adapter *adapter = q_vector->adapter;
5960 struct e1000_hw *hw = &adapter->hw;
5961 u64 regval;
5963 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5964 E1000_RXDADV_STAT_TS))
5965 return;
5968 * If this bit is set, then the RX registers contain the time stamp. No
5969 * other packet will be time stamped until we read these registers, so
5970 * read the registers to make them available again. Because only one
5971 * packet can be time stamped at a time, we know that the register
5972 * values must belong to this one here and therefore we don't need to
5973 * compare any of the additional attributes stored for it.
5975 * If nothing went wrong, then it should have a shared tx_flags that we
5976 * can turn into a skb_shared_hwtstamps.
5978 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5979 u32 *stamp = (u32 *)skb->data;
5980 regval = le32_to_cpu(*(stamp + 2));
5981 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5982 skb_pull(skb, IGB_TS_HDR_LEN);
5983 } else {
5984 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5985 return;
5987 regval = rd32(E1000_RXSTMPL);
5988 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5991 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5994 static void igb_rx_vlan(struct igb_ring *ring,
5995 union e1000_adv_rx_desc *rx_desc,
5996 struct sk_buff *skb)
5998 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5999 u16 vid;
6000 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6001 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6002 vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6003 else
6004 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6006 __vlan_hwaccel_put_tag(skb, vid);
6010 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6012 /* HW will not DMA in data larger than the given buffer, even if it
6013 * parses the (NFS, of course) header to be larger. In that case, it
6014 * fills the header buffer and spills the rest into the page.
6016 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6017 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6018 if (hlen > IGB_RX_HDR_LEN)
6019 hlen = IGB_RX_HDR_LEN;
6020 return hlen;
6023 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6025 struct igb_ring *rx_ring = q_vector->rx.ring;
6026 union e1000_adv_rx_desc *rx_desc;
6027 const int current_node = numa_node_id();
6028 unsigned int total_bytes = 0, total_packets = 0;
6029 u16 cleaned_count = igb_desc_unused(rx_ring);
6030 u16 i = rx_ring->next_to_clean;
6032 rx_desc = IGB_RX_DESC(rx_ring, i);
6034 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6035 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6036 struct sk_buff *skb = buffer_info->skb;
6037 union e1000_adv_rx_desc *next_rxd;
6039 buffer_info->skb = NULL;
6040 prefetch(skb->data);
6042 i++;
6043 if (i == rx_ring->count)
6044 i = 0;
6046 next_rxd = IGB_RX_DESC(rx_ring, i);
6047 prefetch(next_rxd);
6050 * This memory barrier is needed to keep us from reading
6051 * any other fields out of the rx_desc until we know the
6052 * RXD_STAT_DD bit is set
6054 rmb();
6056 if (!skb_is_nonlinear(skb)) {
6057 __skb_put(skb, igb_get_hlen(rx_desc));
6058 dma_unmap_single(rx_ring->dev, buffer_info->dma,
6059 IGB_RX_HDR_LEN,
6060 DMA_FROM_DEVICE);
6061 buffer_info->dma = 0;
6064 if (rx_desc->wb.upper.length) {
6065 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6067 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6068 buffer_info->page,
6069 buffer_info->page_offset,
6070 length);
6072 skb->len += length;
6073 skb->data_len += length;
6074 skb->truesize += PAGE_SIZE / 2;
6076 if ((page_count(buffer_info->page) != 1) ||
6077 (page_to_nid(buffer_info->page) != current_node))
6078 buffer_info->page = NULL;
6079 else
6080 get_page(buffer_info->page);
6082 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6083 PAGE_SIZE / 2, DMA_FROM_DEVICE);
6084 buffer_info->page_dma = 0;
6087 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6088 struct igb_rx_buffer *next_buffer;
6089 next_buffer = &rx_ring->rx_buffer_info[i];
6090 buffer_info->skb = next_buffer->skb;
6091 buffer_info->dma = next_buffer->dma;
6092 next_buffer->skb = skb;
6093 next_buffer->dma = 0;
6094 goto next_desc;
6097 if (igb_test_staterr(rx_desc,
6098 E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
6099 dev_kfree_skb_any(skb);
6100 goto next_desc;
6103 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6104 igb_rx_hash(rx_ring, rx_desc, skb);
6105 igb_rx_checksum(rx_ring, rx_desc, skb);
6106 igb_rx_vlan(rx_ring, rx_desc, skb);
6108 total_bytes += skb->len;
6109 total_packets++;
6111 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6113 napi_gro_receive(&q_vector->napi, skb);
6115 budget--;
6116 next_desc:
6117 if (!budget)
6118 break;
6120 cleaned_count++;
6121 /* return some buffers to hardware, one at a time is too slow */
6122 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6123 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6124 cleaned_count = 0;
6127 /* use prefetched values */
6128 rx_desc = next_rxd;
6131 rx_ring->next_to_clean = i;
6132 u64_stats_update_begin(&rx_ring->rx_syncp);
6133 rx_ring->rx_stats.packets += total_packets;
6134 rx_ring->rx_stats.bytes += total_bytes;
6135 u64_stats_update_end(&rx_ring->rx_syncp);
6136 q_vector->rx.total_packets += total_packets;
6137 q_vector->rx.total_bytes += total_bytes;
6139 if (cleaned_count)
6140 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6142 return !!budget;
6145 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6146 struct igb_rx_buffer *bi)
6148 struct sk_buff *skb = bi->skb;
6149 dma_addr_t dma = bi->dma;
6151 if (dma)
6152 return true;
6154 if (likely(!skb)) {
6155 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6156 IGB_RX_HDR_LEN);
6157 bi->skb = skb;
6158 if (!skb) {
6159 rx_ring->rx_stats.alloc_failed++;
6160 return false;
6163 /* initialize skb for ring */
6164 skb_record_rx_queue(skb, rx_ring->queue_index);
6167 dma = dma_map_single(rx_ring->dev, skb->data,
6168 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6170 if (dma_mapping_error(rx_ring->dev, dma)) {
6171 rx_ring->rx_stats.alloc_failed++;
6172 return false;
6175 bi->dma = dma;
6176 return true;
6179 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6180 struct igb_rx_buffer *bi)
6182 struct page *page = bi->page;
6183 dma_addr_t page_dma = bi->page_dma;
6184 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6186 if (page_dma)
6187 return true;
6189 if (!page) {
6190 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6191 bi->page = page;
6192 if (unlikely(!page)) {
6193 rx_ring->rx_stats.alloc_failed++;
6194 return false;
6198 page_dma = dma_map_page(rx_ring->dev, page,
6199 page_offset, PAGE_SIZE / 2,
6200 DMA_FROM_DEVICE);
6202 if (dma_mapping_error(rx_ring->dev, page_dma)) {
6203 rx_ring->rx_stats.alloc_failed++;
6204 return false;
6207 bi->page_dma = page_dma;
6208 bi->page_offset = page_offset;
6209 return true;
6213 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6214 * @adapter: address of board private structure
6216 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6218 union e1000_adv_rx_desc *rx_desc;
6219 struct igb_rx_buffer *bi;
6220 u16 i = rx_ring->next_to_use;
6222 rx_desc = IGB_RX_DESC(rx_ring, i);
6223 bi = &rx_ring->rx_buffer_info[i];
6224 i -= rx_ring->count;
6226 while (cleaned_count--) {
6227 if (!igb_alloc_mapped_skb(rx_ring, bi))
6228 break;
6230 /* Refresh the desc even if buffer_addrs didn't change
6231 * because each write-back erases this info. */
6232 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6234 if (!igb_alloc_mapped_page(rx_ring, bi))
6235 break;
6237 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6239 rx_desc++;
6240 bi++;
6241 i++;
6242 if (unlikely(!i)) {
6243 rx_desc = IGB_RX_DESC(rx_ring, 0);
6244 bi = rx_ring->rx_buffer_info;
6245 i -= rx_ring->count;
6248 /* clear the hdr_addr for the next_to_use descriptor */
6249 rx_desc->read.hdr_addr = 0;
6252 i += rx_ring->count;
6254 if (rx_ring->next_to_use != i) {
6255 rx_ring->next_to_use = i;
6257 /* Force memory writes to complete before letting h/w
6258 * know there are new descriptors to fetch. (Only
6259 * applicable for weak-ordered memory model archs,
6260 * such as IA-64). */
6261 wmb();
6262 writel(i, rx_ring->tail);
6267 * igb_mii_ioctl -
6268 * @netdev:
6269 * @ifreq:
6270 * @cmd:
6272 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6274 struct igb_adapter *adapter = netdev_priv(netdev);
6275 struct mii_ioctl_data *data = if_mii(ifr);
6277 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6278 return -EOPNOTSUPP;
6280 switch (cmd) {
6281 case SIOCGMIIPHY:
6282 data->phy_id = adapter->hw.phy.addr;
6283 break;
6284 case SIOCGMIIREG:
6285 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6286 &data->val_out))
6287 return -EIO;
6288 break;
6289 case SIOCSMIIREG:
6290 default:
6291 return -EOPNOTSUPP;
6293 return 0;
6297 * igb_hwtstamp_ioctl - control hardware time stamping
6298 * @netdev:
6299 * @ifreq:
6300 * @cmd:
6302 * Outgoing time stamping can be enabled and disabled. Play nice and
6303 * disable it when requested, although it shouldn't case any overhead
6304 * when no packet needs it. At most one packet in the queue may be
6305 * marked for time stamping, otherwise it would be impossible to tell
6306 * for sure to which packet the hardware time stamp belongs.
6308 * Incoming time stamping has to be configured via the hardware
6309 * filters. Not all combinations are supported, in particular event
6310 * type has to be specified. Matching the kind of event packet is
6311 * not supported, with the exception of "all V2 events regardless of
6312 * level 2 or 4".
6315 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6316 struct ifreq *ifr, int cmd)
6318 struct igb_adapter *adapter = netdev_priv(netdev);
6319 struct e1000_hw *hw = &adapter->hw;
6320 struct hwtstamp_config config;
6321 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6322 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6323 u32 tsync_rx_cfg = 0;
6324 bool is_l4 = false;
6325 bool is_l2 = false;
6326 u32 regval;
6328 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6329 return -EFAULT;
6331 /* reserved for future extensions */
6332 if (config.flags)
6333 return -EINVAL;
6335 switch (config.tx_type) {
6336 case HWTSTAMP_TX_OFF:
6337 tsync_tx_ctl = 0;
6338 case HWTSTAMP_TX_ON:
6339 break;
6340 default:
6341 return -ERANGE;
6344 switch (config.rx_filter) {
6345 case HWTSTAMP_FILTER_NONE:
6346 tsync_rx_ctl = 0;
6347 break;
6348 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6349 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6350 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6351 case HWTSTAMP_FILTER_ALL:
6353 * register TSYNCRXCFG must be set, therefore it is not
6354 * possible to time stamp both Sync and Delay_Req messages
6355 * => fall back to time stamping all packets
6357 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6358 config.rx_filter = HWTSTAMP_FILTER_ALL;
6359 break;
6360 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6361 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6362 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6363 is_l4 = true;
6364 break;
6365 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6366 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6367 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6368 is_l4 = true;
6369 break;
6370 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6371 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6372 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6373 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6374 is_l2 = true;
6375 is_l4 = true;
6376 config.rx_filter = HWTSTAMP_FILTER_SOME;
6377 break;
6378 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6379 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6380 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6381 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6382 is_l2 = true;
6383 is_l4 = true;
6384 config.rx_filter = HWTSTAMP_FILTER_SOME;
6385 break;
6386 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6387 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6388 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6389 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6390 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6391 is_l2 = true;
6392 is_l4 = true;
6393 break;
6394 default:
6395 return -ERANGE;
6398 if (hw->mac.type == e1000_82575) {
6399 if (tsync_rx_ctl | tsync_tx_ctl)
6400 return -EINVAL;
6401 return 0;
6405 * Per-packet timestamping only works if all packets are
6406 * timestamped, so enable timestamping in all packets as
6407 * long as one rx filter was configured.
6409 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6410 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6411 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6414 /* enable/disable TX */
6415 regval = rd32(E1000_TSYNCTXCTL);
6416 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6417 regval |= tsync_tx_ctl;
6418 wr32(E1000_TSYNCTXCTL, regval);
6420 /* enable/disable RX */
6421 regval = rd32(E1000_TSYNCRXCTL);
6422 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6423 regval |= tsync_rx_ctl;
6424 wr32(E1000_TSYNCRXCTL, regval);
6426 /* define which PTP packets are time stamped */
6427 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6429 /* define ethertype filter for timestamped packets */
6430 if (is_l2)
6431 wr32(E1000_ETQF(3),
6432 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6433 E1000_ETQF_1588 | /* enable timestamping */
6434 ETH_P_1588)); /* 1588 eth protocol type */
6435 else
6436 wr32(E1000_ETQF(3), 0);
6438 #define PTP_PORT 319
6439 /* L4 Queue Filter[3]: filter by destination port and protocol */
6440 if (is_l4) {
6441 u32 ftqf = (IPPROTO_UDP /* UDP */
6442 | E1000_FTQF_VF_BP /* VF not compared */
6443 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6444 | E1000_FTQF_MASK); /* mask all inputs */
6445 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6447 wr32(E1000_IMIR(3), htons(PTP_PORT));
6448 wr32(E1000_IMIREXT(3),
6449 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6450 if (hw->mac.type == e1000_82576) {
6451 /* enable source port check */
6452 wr32(E1000_SPQF(3), htons(PTP_PORT));
6453 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6455 wr32(E1000_FTQF(3), ftqf);
6456 } else {
6457 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6459 wrfl();
6461 adapter->hwtstamp_config = config;
6463 /* clear TX/RX time stamp registers, just to be sure */
6464 regval = rd32(E1000_TXSTMPH);
6465 regval = rd32(E1000_RXSTMPH);
6467 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6468 -EFAULT : 0;
6472 * igb_ioctl -
6473 * @netdev:
6474 * @ifreq:
6475 * @cmd:
6477 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6479 switch (cmd) {
6480 case SIOCGMIIPHY:
6481 case SIOCGMIIREG:
6482 case SIOCSMIIREG:
6483 return igb_mii_ioctl(netdev, ifr, cmd);
6484 case SIOCSHWTSTAMP:
6485 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6486 default:
6487 return -EOPNOTSUPP;
6491 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6493 struct igb_adapter *adapter = hw->back;
6494 u16 cap_offset;
6496 cap_offset = adapter->pdev->pcie_cap;
6497 if (!cap_offset)
6498 return -E1000_ERR_CONFIG;
6500 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6502 return 0;
6505 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6507 struct igb_adapter *adapter = hw->back;
6508 u16 cap_offset;
6510 cap_offset = adapter->pdev->pcie_cap;
6511 if (!cap_offset)
6512 return -E1000_ERR_CONFIG;
6514 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6516 return 0;
6519 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6521 struct igb_adapter *adapter = netdev_priv(netdev);
6522 struct e1000_hw *hw = &adapter->hw;
6523 u32 ctrl, rctl;
6524 bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6526 if (enable) {
6527 /* enable VLAN tag insert/strip */
6528 ctrl = rd32(E1000_CTRL);
6529 ctrl |= E1000_CTRL_VME;
6530 wr32(E1000_CTRL, ctrl);
6532 /* Disable CFI check */
6533 rctl = rd32(E1000_RCTL);
6534 rctl &= ~E1000_RCTL_CFIEN;
6535 wr32(E1000_RCTL, rctl);
6536 } else {
6537 /* disable VLAN tag insert/strip */
6538 ctrl = rd32(E1000_CTRL);
6539 ctrl &= ~E1000_CTRL_VME;
6540 wr32(E1000_CTRL, ctrl);
6543 igb_rlpml_set(adapter);
6546 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6548 struct igb_adapter *adapter = netdev_priv(netdev);
6549 struct e1000_hw *hw = &adapter->hw;
6550 int pf_id = adapter->vfs_allocated_count;
6552 /* attempt to add filter to vlvf array */
6553 igb_vlvf_set(adapter, vid, true, pf_id);
6555 /* add the filter since PF can receive vlans w/o entry in vlvf */
6556 igb_vfta_set(hw, vid, true);
6558 set_bit(vid, adapter->active_vlans);
6560 return 0;
6563 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6565 struct igb_adapter *adapter = netdev_priv(netdev);
6566 struct e1000_hw *hw = &adapter->hw;
6567 int pf_id = adapter->vfs_allocated_count;
6568 s32 err;
6570 /* remove vlan from VLVF table array */
6571 err = igb_vlvf_set(adapter, vid, false, pf_id);
6573 /* if vid was not present in VLVF just remove it from table */
6574 if (err)
6575 igb_vfta_set(hw, vid, false);
6577 clear_bit(vid, adapter->active_vlans);
6579 return 0;
6582 static void igb_restore_vlan(struct igb_adapter *adapter)
6584 u16 vid;
6586 igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6588 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6589 igb_vlan_rx_add_vid(adapter->netdev, vid);
6592 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6594 struct pci_dev *pdev = adapter->pdev;
6595 struct e1000_mac_info *mac = &adapter->hw.mac;
6597 mac->autoneg = 0;
6599 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6600 * for the switch() below to work */
6601 if ((spd & 1) || (dplx & ~1))
6602 goto err_inval;
6604 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6605 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6606 spd != SPEED_1000 &&
6607 dplx != DUPLEX_FULL)
6608 goto err_inval;
6610 switch (spd + dplx) {
6611 case SPEED_10 + DUPLEX_HALF:
6612 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6613 break;
6614 case SPEED_10 + DUPLEX_FULL:
6615 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6616 break;
6617 case SPEED_100 + DUPLEX_HALF:
6618 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6619 break;
6620 case SPEED_100 + DUPLEX_FULL:
6621 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6622 break;
6623 case SPEED_1000 + DUPLEX_FULL:
6624 mac->autoneg = 1;
6625 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6626 break;
6627 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6628 default:
6629 goto err_inval;
6631 return 0;
6633 err_inval:
6634 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6635 return -EINVAL;
6638 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6639 bool runtime)
6641 struct net_device *netdev = pci_get_drvdata(pdev);
6642 struct igb_adapter *adapter = netdev_priv(netdev);
6643 struct e1000_hw *hw = &adapter->hw;
6644 u32 ctrl, rctl, status;
6645 u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6646 #ifdef CONFIG_PM
6647 int retval = 0;
6648 #endif
6650 netif_device_detach(netdev);
6652 if (netif_running(netdev))
6653 __igb_close(netdev, true);
6655 igb_clear_interrupt_scheme(adapter);
6657 #ifdef CONFIG_PM
6658 retval = pci_save_state(pdev);
6659 if (retval)
6660 return retval;
6661 #endif
6663 status = rd32(E1000_STATUS);
6664 if (status & E1000_STATUS_LU)
6665 wufc &= ~E1000_WUFC_LNKC;
6667 if (wufc) {
6668 igb_setup_rctl(adapter);
6669 igb_set_rx_mode(netdev);
6671 /* turn on all-multi mode if wake on multicast is enabled */
6672 if (wufc & E1000_WUFC_MC) {
6673 rctl = rd32(E1000_RCTL);
6674 rctl |= E1000_RCTL_MPE;
6675 wr32(E1000_RCTL, rctl);
6678 ctrl = rd32(E1000_CTRL);
6679 /* advertise wake from D3Cold */
6680 #define E1000_CTRL_ADVD3WUC 0x00100000
6681 /* phy power management enable */
6682 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6683 ctrl |= E1000_CTRL_ADVD3WUC;
6684 wr32(E1000_CTRL, ctrl);
6686 /* Allow time for pending master requests to run */
6687 igb_disable_pcie_master(hw);
6689 wr32(E1000_WUC, E1000_WUC_PME_EN);
6690 wr32(E1000_WUFC, wufc);
6691 } else {
6692 wr32(E1000_WUC, 0);
6693 wr32(E1000_WUFC, 0);
6696 *enable_wake = wufc || adapter->en_mng_pt;
6697 if (!*enable_wake)
6698 igb_power_down_link(adapter);
6699 else
6700 igb_power_up_link(adapter);
6702 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6703 * would have already happened in close and is redundant. */
6704 igb_release_hw_control(adapter);
6706 pci_disable_device(pdev);
6708 return 0;
6711 #ifdef CONFIG_PM
6712 static int igb_suspend(struct device *dev)
6714 int retval;
6715 bool wake;
6716 struct pci_dev *pdev = to_pci_dev(dev);
6718 retval = __igb_shutdown(pdev, &wake, 0);
6719 if (retval)
6720 return retval;
6722 if (wake) {
6723 pci_prepare_to_sleep(pdev);
6724 } else {
6725 pci_wake_from_d3(pdev, false);
6726 pci_set_power_state(pdev, PCI_D3hot);
6729 return 0;
6732 static int igb_resume(struct device *dev)
6734 struct pci_dev *pdev = to_pci_dev(dev);
6735 struct net_device *netdev = pci_get_drvdata(pdev);
6736 struct igb_adapter *adapter = netdev_priv(netdev);
6737 struct e1000_hw *hw = &adapter->hw;
6738 u32 err;
6740 pci_set_power_state(pdev, PCI_D0);
6741 pci_restore_state(pdev);
6742 pci_save_state(pdev);
6744 err = pci_enable_device_mem(pdev);
6745 if (err) {
6746 dev_err(&pdev->dev,
6747 "igb: Cannot enable PCI device from suspend\n");
6748 return err;
6750 pci_set_master(pdev);
6752 pci_enable_wake(pdev, PCI_D3hot, 0);
6753 pci_enable_wake(pdev, PCI_D3cold, 0);
6755 if (!rtnl_is_locked()) {
6757 * shut up ASSERT_RTNL() warning in
6758 * netif_set_real_num_tx/rx_queues.
6760 rtnl_lock();
6761 err = igb_init_interrupt_scheme(adapter);
6762 rtnl_unlock();
6763 } else {
6764 err = igb_init_interrupt_scheme(adapter);
6766 if (err) {
6767 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6768 return -ENOMEM;
6771 igb_reset(adapter);
6773 /* let the f/w know that the h/w is now under the control of the
6774 * driver. */
6775 igb_get_hw_control(adapter);
6777 wr32(E1000_WUS, ~0);
6779 if (netdev->flags & IFF_UP) {
6780 err = __igb_open(netdev, true);
6781 if (err)
6782 return err;
6785 netif_device_attach(netdev);
6786 return 0;
6789 #ifdef CONFIG_PM_RUNTIME
6790 static int igb_runtime_idle(struct device *dev)
6792 struct pci_dev *pdev = to_pci_dev(dev);
6793 struct net_device *netdev = pci_get_drvdata(pdev);
6794 struct igb_adapter *adapter = netdev_priv(netdev);
6796 if (!igb_has_link(adapter))
6797 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6799 return -EBUSY;
6802 static int igb_runtime_suspend(struct device *dev)
6804 struct pci_dev *pdev = to_pci_dev(dev);
6805 int retval;
6806 bool wake;
6808 retval = __igb_shutdown(pdev, &wake, 1);
6809 if (retval)
6810 return retval;
6812 if (wake) {
6813 pci_prepare_to_sleep(pdev);
6814 } else {
6815 pci_wake_from_d3(pdev, false);
6816 pci_set_power_state(pdev, PCI_D3hot);
6819 return 0;
6822 static int igb_runtime_resume(struct device *dev)
6824 return igb_resume(dev);
6826 #endif /* CONFIG_PM_RUNTIME */
6827 #endif
6829 static void igb_shutdown(struct pci_dev *pdev)
6831 bool wake;
6833 __igb_shutdown(pdev, &wake, 0);
6835 if (system_state == SYSTEM_POWER_OFF) {
6836 pci_wake_from_d3(pdev, wake);
6837 pci_set_power_state(pdev, PCI_D3hot);
6841 #ifdef CONFIG_NET_POLL_CONTROLLER
6843 * Polling 'interrupt' - used by things like netconsole to send skbs
6844 * without having to re-enable interrupts. It's not called while
6845 * the interrupt routine is executing.
6847 static void igb_netpoll(struct net_device *netdev)
6849 struct igb_adapter *adapter = netdev_priv(netdev);
6850 struct e1000_hw *hw = &adapter->hw;
6851 struct igb_q_vector *q_vector;
6852 int i;
6854 for (i = 0; i < adapter->num_q_vectors; i++) {
6855 q_vector = adapter->q_vector[i];
6856 if (adapter->msix_entries)
6857 wr32(E1000_EIMC, q_vector->eims_value);
6858 else
6859 igb_irq_disable(adapter);
6860 napi_schedule(&q_vector->napi);
6863 #endif /* CONFIG_NET_POLL_CONTROLLER */
6866 * igb_io_error_detected - called when PCI error is detected
6867 * @pdev: Pointer to PCI device
6868 * @state: The current pci connection state
6870 * This function is called after a PCI bus error affecting
6871 * this device has been detected.
6873 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6874 pci_channel_state_t state)
6876 struct net_device *netdev = pci_get_drvdata(pdev);
6877 struct igb_adapter *adapter = netdev_priv(netdev);
6879 netif_device_detach(netdev);
6881 if (state == pci_channel_io_perm_failure)
6882 return PCI_ERS_RESULT_DISCONNECT;
6884 if (netif_running(netdev))
6885 igb_down(adapter);
6886 pci_disable_device(pdev);
6888 /* Request a slot slot reset. */
6889 return PCI_ERS_RESULT_NEED_RESET;
6893 * igb_io_slot_reset - called after the pci bus has been reset.
6894 * @pdev: Pointer to PCI device
6896 * Restart the card from scratch, as if from a cold-boot. Implementation
6897 * resembles the first-half of the igb_resume routine.
6899 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6901 struct net_device *netdev = pci_get_drvdata(pdev);
6902 struct igb_adapter *adapter = netdev_priv(netdev);
6903 struct e1000_hw *hw = &adapter->hw;
6904 pci_ers_result_t result;
6905 int err;
6907 if (pci_enable_device_mem(pdev)) {
6908 dev_err(&pdev->dev,
6909 "Cannot re-enable PCI device after reset.\n");
6910 result = PCI_ERS_RESULT_DISCONNECT;
6911 } else {
6912 pci_set_master(pdev);
6913 pci_restore_state(pdev);
6914 pci_save_state(pdev);
6916 pci_enable_wake(pdev, PCI_D3hot, 0);
6917 pci_enable_wake(pdev, PCI_D3cold, 0);
6919 igb_reset(adapter);
6920 wr32(E1000_WUS, ~0);
6921 result = PCI_ERS_RESULT_RECOVERED;
6924 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6925 if (err) {
6926 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6927 "failed 0x%0x\n", err);
6928 /* non-fatal, continue */
6931 return result;
6935 * igb_io_resume - called when traffic can start flowing again.
6936 * @pdev: Pointer to PCI device
6938 * This callback is called when the error recovery driver tells us that
6939 * its OK to resume normal operation. Implementation resembles the
6940 * second-half of the igb_resume routine.
6942 static void igb_io_resume(struct pci_dev *pdev)
6944 struct net_device *netdev = pci_get_drvdata(pdev);
6945 struct igb_adapter *adapter = netdev_priv(netdev);
6947 if (netif_running(netdev)) {
6948 if (igb_up(adapter)) {
6949 dev_err(&pdev->dev, "igb_up failed after reset\n");
6950 return;
6954 netif_device_attach(netdev);
6956 /* let the f/w know that the h/w is now under the control of the
6957 * driver. */
6958 igb_get_hw_control(adapter);
6961 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6962 u8 qsel)
6964 u32 rar_low, rar_high;
6965 struct e1000_hw *hw = &adapter->hw;
6967 /* HW expects these in little endian so we reverse the byte order
6968 * from network order (big endian) to little endian
6970 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6971 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6972 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6974 /* Indicate to hardware the Address is Valid. */
6975 rar_high |= E1000_RAH_AV;
6977 if (hw->mac.type == e1000_82575)
6978 rar_high |= E1000_RAH_POOL_1 * qsel;
6979 else
6980 rar_high |= E1000_RAH_POOL_1 << qsel;
6982 wr32(E1000_RAL(index), rar_low);
6983 wrfl();
6984 wr32(E1000_RAH(index), rar_high);
6985 wrfl();
6988 static int igb_set_vf_mac(struct igb_adapter *adapter,
6989 int vf, unsigned char *mac_addr)
6991 struct e1000_hw *hw = &adapter->hw;
6992 /* VF MAC addresses start at end of receive addresses and moves
6993 * torwards the first, as a result a collision should not be possible */
6994 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6996 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6998 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7000 return 0;
7003 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7005 struct igb_adapter *adapter = netdev_priv(netdev);
7006 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7007 return -EINVAL;
7008 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7009 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7010 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7011 " change effective.");
7012 if (test_bit(__IGB_DOWN, &adapter->state)) {
7013 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7014 " but the PF device is not up.\n");
7015 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7016 " attempting to use the VF device.\n");
7018 return igb_set_vf_mac(adapter, vf, mac);
7021 static int igb_link_mbps(int internal_link_speed)
7023 switch (internal_link_speed) {
7024 case SPEED_100:
7025 return 100;
7026 case SPEED_1000:
7027 return 1000;
7028 default:
7029 return 0;
7033 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7034 int link_speed)
7036 int rf_dec, rf_int;
7037 u32 bcnrc_val;
7039 if (tx_rate != 0) {
7040 /* Calculate the rate factor values to set */
7041 rf_int = link_speed / tx_rate;
7042 rf_dec = (link_speed - (rf_int * tx_rate));
7043 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7045 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7046 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7047 E1000_RTTBCNRC_RF_INT_MASK);
7048 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7049 } else {
7050 bcnrc_val = 0;
7053 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7054 wr32(E1000_RTTBCNRC, bcnrc_val);
7057 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7059 int actual_link_speed, i;
7060 bool reset_rate = false;
7062 /* VF TX rate limit was not set or not supported */
7063 if ((adapter->vf_rate_link_speed == 0) ||
7064 (adapter->hw.mac.type != e1000_82576))
7065 return;
7067 actual_link_speed = igb_link_mbps(adapter->link_speed);
7068 if (actual_link_speed != adapter->vf_rate_link_speed) {
7069 reset_rate = true;
7070 adapter->vf_rate_link_speed = 0;
7071 dev_info(&adapter->pdev->dev,
7072 "Link speed has been changed. VF Transmit "
7073 "rate is disabled\n");
7076 for (i = 0; i < adapter->vfs_allocated_count; i++) {
7077 if (reset_rate)
7078 adapter->vf_data[i].tx_rate = 0;
7080 igb_set_vf_rate_limit(&adapter->hw, i,
7081 adapter->vf_data[i].tx_rate,
7082 actual_link_speed);
7086 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7088 struct igb_adapter *adapter = netdev_priv(netdev);
7089 struct e1000_hw *hw = &adapter->hw;
7090 int actual_link_speed;
7092 if (hw->mac.type != e1000_82576)
7093 return -EOPNOTSUPP;
7095 actual_link_speed = igb_link_mbps(adapter->link_speed);
7096 if ((vf >= adapter->vfs_allocated_count) ||
7097 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7098 (tx_rate < 0) || (tx_rate > actual_link_speed))
7099 return -EINVAL;
7101 adapter->vf_rate_link_speed = actual_link_speed;
7102 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7103 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7105 return 0;
7108 static int igb_ndo_get_vf_config(struct net_device *netdev,
7109 int vf, struct ifla_vf_info *ivi)
7111 struct igb_adapter *adapter = netdev_priv(netdev);
7112 if (vf >= adapter->vfs_allocated_count)
7113 return -EINVAL;
7114 ivi->vf = vf;
7115 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7116 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7117 ivi->vlan = adapter->vf_data[vf].pf_vlan;
7118 ivi->qos = adapter->vf_data[vf].pf_qos;
7119 return 0;
7122 static void igb_vmm_control(struct igb_adapter *adapter)
7124 struct e1000_hw *hw = &adapter->hw;
7125 u32 reg;
7127 switch (hw->mac.type) {
7128 case e1000_82575:
7129 default:
7130 /* replication is not supported for 82575 */
7131 return;
7132 case e1000_82576:
7133 /* notify HW that the MAC is adding vlan tags */
7134 reg = rd32(E1000_DTXCTL);
7135 reg |= E1000_DTXCTL_VLAN_ADDED;
7136 wr32(E1000_DTXCTL, reg);
7137 case e1000_82580:
7138 /* enable replication vlan tag stripping */
7139 reg = rd32(E1000_RPLOLR);
7140 reg |= E1000_RPLOLR_STRVLAN;
7141 wr32(E1000_RPLOLR, reg);
7142 case e1000_i350:
7143 /* none of the above registers are supported by i350 */
7144 break;
7147 if (adapter->vfs_allocated_count) {
7148 igb_vmdq_set_loopback_pf(hw, true);
7149 igb_vmdq_set_replication_pf(hw, true);
7150 igb_vmdq_set_anti_spoofing_pf(hw, true,
7151 adapter->vfs_allocated_count);
7152 } else {
7153 igb_vmdq_set_loopback_pf(hw, false);
7154 igb_vmdq_set_replication_pf(hw, false);
7158 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7160 struct e1000_hw *hw = &adapter->hw;
7161 u32 dmac_thr;
7162 u16 hwm;
7164 if (hw->mac.type > e1000_82580) {
7165 if (adapter->flags & IGB_FLAG_DMAC) {
7166 u32 reg;
7168 /* force threshold to 0. */
7169 wr32(E1000_DMCTXTH, 0);
7172 * DMA Coalescing high water mark needs to be greater
7173 * than the Rx threshold. Set hwm to PBA - max frame
7174 * size in 16B units, capping it at PBA - 6KB.
7176 hwm = 64 * pba - adapter->max_frame_size / 16;
7177 if (hwm < 64 * (pba - 6))
7178 hwm = 64 * (pba - 6);
7179 reg = rd32(E1000_FCRTC);
7180 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7181 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7182 & E1000_FCRTC_RTH_COAL_MASK);
7183 wr32(E1000_FCRTC, reg);
7186 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7187 * frame size, capping it at PBA - 10KB.
7189 dmac_thr = pba - adapter->max_frame_size / 512;
7190 if (dmac_thr < pba - 10)
7191 dmac_thr = pba - 10;
7192 reg = rd32(E1000_DMACR);
7193 reg &= ~E1000_DMACR_DMACTHR_MASK;
7194 reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7195 & E1000_DMACR_DMACTHR_MASK);
7197 /* transition to L0x or L1 if available..*/
7198 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7200 /* watchdog timer= +-1000 usec in 32usec intervals */
7201 reg |= (1000 >> 5);
7202 wr32(E1000_DMACR, reg);
7205 * no lower threshold to disable
7206 * coalescing(smart fifb)-UTRESH=0
7208 wr32(E1000_DMCRTRH, 0);
7210 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7212 wr32(E1000_DMCTLX, reg);
7215 * free space in tx packet buffer to wake from
7216 * DMA coal
7218 wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7219 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7222 * make low power state decision controlled
7223 * by DMA coal
7225 reg = rd32(E1000_PCIEMISC);
7226 reg &= ~E1000_PCIEMISC_LX_DECISION;
7227 wr32(E1000_PCIEMISC, reg);
7228 } /* endif adapter->dmac is not disabled */
7229 } else if (hw->mac.type == e1000_82580) {
7230 u32 reg = rd32(E1000_PCIEMISC);
7231 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7232 wr32(E1000_DMACR, 0);
7236 /* igb_main.c */