1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
49 #include <linux/tcp.h>
50 #include <linux/sctp.h>
51 #include <linux/if_ether.h>
52 #include <linux/aer.h>
53 #include <linux/prefetch.h>
55 #include <linux/dca.h>
62 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63 __stringify(BUILD) "-k"
64 char igb_driver_name
[] = "igb";
65 char igb_driver_version
[] = DRV_VERSION
;
66 static const char igb_driver_string
[] =
67 "Intel(R) Gigabit Ethernet Network Driver";
68 static const char igb_copyright
[] = "Copyright (c) 2007-2011 Intel Corporation.";
70 static const struct e1000_info
*igb_info_tbl
[] = {
71 [board_82575
] = &e1000_82575_info
,
74 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl
) = {
75 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_COPPER
), board_82575
},
76 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_FIBER
), board_82575
},
77 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_SERDES
), board_82575
},
78 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_SGMII
), board_82575
},
79 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_COPPER
), board_82575
},
80 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_FIBER
), board_82575
},
81 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_QUAD_FIBER
), board_82575
},
82 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_SERDES
), board_82575
},
83 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_SGMII
), board_82575
},
84 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_COPPER_DUAL
), board_82575
},
85 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_SGMII
), board_82575
},
86 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_SERDES
), board_82575
},
87 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_BACKPLANE
), board_82575
},
88 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_SFP
), board_82575
},
89 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576
), board_82575
},
90 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_NS
), board_82575
},
91 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_NS_SERDES
), board_82575
},
92 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_FIBER
), board_82575
},
93 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_SERDES
), board_82575
},
94 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_SERDES_QUAD
), board_82575
},
95 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_QUAD_COPPER_ET2
), board_82575
},
96 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_QUAD_COPPER
), board_82575
},
97 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82575EB_COPPER
), board_82575
},
98 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82575EB_FIBER_SERDES
), board_82575
},
99 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82575GB_QUAD_COPPER
), board_82575
},
100 /* required last entry */
104 MODULE_DEVICE_TABLE(pci
, igb_pci_tbl
);
106 void igb_reset(struct igb_adapter
*);
107 static int igb_setup_all_tx_resources(struct igb_adapter
*);
108 static int igb_setup_all_rx_resources(struct igb_adapter
*);
109 static void igb_free_all_tx_resources(struct igb_adapter
*);
110 static void igb_free_all_rx_resources(struct igb_adapter
*);
111 static void igb_setup_mrqc(struct igb_adapter
*);
112 static int igb_probe(struct pci_dev
*, const struct pci_device_id
*);
113 static void __devexit
igb_remove(struct pci_dev
*pdev
);
114 static void igb_init_hw_timer(struct igb_adapter
*adapter
);
115 static int igb_sw_init(struct igb_adapter
*);
116 static int igb_open(struct net_device
*);
117 static int igb_close(struct net_device
*);
118 static void igb_configure_tx(struct igb_adapter
*);
119 static void igb_configure_rx(struct igb_adapter
*);
120 static void igb_clean_all_tx_rings(struct igb_adapter
*);
121 static void igb_clean_all_rx_rings(struct igb_adapter
*);
122 static void igb_clean_tx_ring(struct igb_ring
*);
123 static void igb_clean_rx_ring(struct igb_ring
*);
124 static void igb_set_rx_mode(struct net_device
*);
125 static void igb_update_phy_info(unsigned long);
126 static void igb_watchdog(unsigned long);
127 static void igb_watchdog_task(struct work_struct
*);
128 static netdev_tx_t
igb_xmit_frame(struct sk_buff
*skb
, struct net_device
*);
129 static struct rtnl_link_stats64
*igb_get_stats64(struct net_device
*dev
,
130 struct rtnl_link_stats64
*stats
);
131 static int igb_change_mtu(struct net_device
*, int);
132 static int igb_set_mac(struct net_device
*, void *);
133 static void igb_set_uta(struct igb_adapter
*adapter
);
134 static irqreturn_t
igb_intr(int irq
, void *);
135 static irqreturn_t
igb_intr_msi(int irq
, void *);
136 static irqreturn_t
igb_msix_other(int irq
, void *);
137 static irqreturn_t
igb_msix_ring(int irq
, void *);
138 #ifdef CONFIG_IGB_DCA
139 static void igb_update_dca(struct igb_q_vector
*);
140 static void igb_setup_dca(struct igb_adapter
*);
141 #endif /* CONFIG_IGB_DCA */
142 static int igb_poll(struct napi_struct
*, int);
143 static bool igb_clean_tx_irq(struct igb_q_vector
*);
144 static bool igb_clean_rx_irq(struct igb_q_vector
*, int);
145 static int igb_ioctl(struct net_device
*, struct ifreq
*, int cmd
);
146 static void igb_tx_timeout(struct net_device
*);
147 static void igb_reset_task(struct work_struct
*);
148 static void igb_vlan_mode(struct net_device
*netdev
, u32 features
);
149 static void igb_vlan_rx_add_vid(struct net_device
*, u16
);
150 static void igb_vlan_rx_kill_vid(struct net_device
*, u16
);
151 static void igb_restore_vlan(struct igb_adapter
*);
152 static void igb_rar_set_qsel(struct igb_adapter
*, u8
*, u32
, u8
);
153 static void igb_ping_all_vfs(struct igb_adapter
*);
154 static void igb_msg_task(struct igb_adapter
*);
155 static void igb_vmm_control(struct igb_adapter
*);
156 static int igb_set_vf_mac(struct igb_adapter
*, int, unsigned char *);
157 static void igb_restore_vf_multicasts(struct igb_adapter
*adapter
);
158 static int igb_ndo_set_vf_mac(struct net_device
*netdev
, int vf
, u8
*mac
);
159 static int igb_ndo_set_vf_vlan(struct net_device
*netdev
,
160 int vf
, u16 vlan
, u8 qos
);
161 static int igb_ndo_set_vf_bw(struct net_device
*netdev
, int vf
, int tx_rate
);
162 static int igb_ndo_get_vf_config(struct net_device
*netdev
, int vf
,
163 struct ifla_vf_info
*ivi
);
164 static void igb_check_vf_rate_limit(struct igb_adapter
*);
166 #ifdef CONFIG_PCI_IOV
167 static int igb_vf_configure(struct igb_adapter
*adapter
, int vf
);
168 static int igb_find_enabled_vfs(struct igb_adapter
*adapter
);
169 static int igb_check_vf_assignment(struct igb_adapter
*adapter
);
173 static int igb_suspend(struct pci_dev
*, pm_message_t
);
174 static int igb_resume(struct pci_dev
*);
176 static void igb_shutdown(struct pci_dev
*);
177 #ifdef CONFIG_IGB_DCA
178 static int igb_notify_dca(struct notifier_block
*, unsigned long, void *);
179 static struct notifier_block dca_notifier
= {
180 .notifier_call
= igb_notify_dca
,
185 #ifdef CONFIG_NET_POLL_CONTROLLER
186 /* for netdump / net console */
187 static void igb_netpoll(struct net_device
*);
189 #ifdef CONFIG_PCI_IOV
190 static unsigned int max_vfs
= 0;
191 module_param(max_vfs
, uint
, 0);
192 MODULE_PARM_DESC(max_vfs
, "Maximum number of virtual functions to allocate "
193 "per physical function");
194 #endif /* CONFIG_PCI_IOV */
196 static pci_ers_result_t
igb_io_error_detected(struct pci_dev
*,
197 pci_channel_state_t
);
198 static pci_ers_result_t
igb_io_slot_reset(struct pci_dev
*);
199 static void igb_io_resume(struct pci_dev
*);
201 static struct pci_error_handlers igb_err_handler
= {
202 .error_detected
= igb_io_error_detected
,
203 .slot_reset
= igb_io_slot_reset
,
204 .resume
= igb_io_resume
,
207 static void igb_init_dmac(struct igb_adapter
*adapter
, u32 pba
);
209 static struct pci_driver igb_driver
= {
210 .name
= igb_driver_name
,
211 .id_table
= igb_pci_tbl
,
213 .remove
= __devexit_p(igb_remove
),
215 /* Power Management Hooks */
216 .suspend
= igb_suspend
,
217 .resume
= igb_resume
,
219 .shutdown
= igb_shutdown
,
220 .err_handler
= &igb_err_handler
223 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
224 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
225 MODULE_LICENSE("GPL");
226 MODULE_VERSION(DRV_VERSION
);
228 struct igb_reg_info
{
233 static const struct igb_reg_info igb_reg_info_tbl
[] = {
235 /* General Registers */
236 {E1000_CTRL
, "CTRL"},
237 {E1000_STATUS
, "STATUS"},
238 {E1000_CTRL_EXT
, "CTRL_EXT"},
240 /* Interrupt Registers */
244 {E1000_RCTL
, "RCTL"},
245 {E1000_RDLEN(0), "RDLEN"},
246 {E1000_RDH(0), "RDH"},
247 {E1000_RDT(0), "RDT"},
248 {E1000_RXDCTL(0), "RXDCTL"},
249 {E1000_RDBAL(0), "RDBAL"},
250 {E1000_RDBAH(0), "RDBAH"},
253 {E1000_TCTL
, "TCTL"},
254 {E1000_TDBAL(0), "TDBAL"},
255 {E1000_TDBAH(0), "TDBAH"},
256 {E1000_TDLEN(0), "TDLEN"},
257 {E1000_TDH(0), "TDH"},
258 {E1000_TDT(0), "TDT"},
259 {E1000_TXDCTL(0), "TXDCTL"},
260 {E1000_TDFH
, "TDFH"},
261 {E1000_TDFT
, "TDFT"},
262 {E1000_TDFHS
, "TDFHS"},
263 {E1000_TDFPC
, "TDFPC"},
265 /* List Terminator */
270 * igb_regdump - register printout routine
272 static void igb_regdump(struct e1000_hw
*hw
, struct igb_reg_info
*reginfo
)
278 switch (reginfo
->ofs
) {
280 for (n
= 0; n
< 4; n
++)
281 regs
[n
] = rd32(E1000_RDLEN(n
));
284 for (n
= 0; n
< 4; n
++)
285 regs
[n
] = rd32(E1000_RDH(n
));
288 for (n
= 0; n
< 4; n
++)
289 regs
[n
] = rd32(E1000_RDT(n
));
291 case E1000_RXDCTL(0):
292 for (n
= 0; n
< 4; n
++)
293 regs
[n
] = rd32(E1000_RXDCTL(n
));
296 for (n
= 0; n
< 4; n
++)
297 regs
[n
] = rd32(E1000_RDBAL(n
));
300 for (n
= 0; n
< 4; n
++)
301 regs
[n
] = rd32(E1000_RDBAH(n
));
304 for (n
= 0; n
< 4; n
++)
305 regs
[n
] = rd32(E1000_RDBAL(n
));
308 for (n
= 0; n
< 4; n
++)
309 regs
[n
] = rd32(E1000_TDBAH(n
));
312 for (n
= 0; n
< 4; n
++)
313 regs
[n
] = rd32(E1000_TDLEN(n
));
316 for (n
= 0; n
< 4; n
++)
317 regs
[n
] = rd32(E1000_TDH(n
));
320 for (n
= 0; n
< 4; n
++)
321 regs
[n
] = rd32(E1000_TDT(n
));
323 case E1000_TXDCTL(0):
324 for (n
= 0; n
< 4; n
++)
325 regs
[n
] = rd32(E1000_TXDCTL(n
));
328 printk(KERN_INFO
"%-15s %08x\n",
329 reginfo
->name
, rd32(reginfo
->ofs
));
333 snprintf(rname
, 16, "%s%s", reginfo
->name
, "[0-3]");
334 printk(KERN_INFO
"%-15s ", rname
);
335 for (n
= 0; n
< 4; n
++)
336 printk(KERN_CONT
"%08x ", regs
[n
]);
337 printk(KERN_CONT
"\n");
341 * igb_dump - Print registers, tx-rings and rx-rings
343 static void igb_dump(struct igb_adapter
*adapter
)
345 struct net_device
*netdev
= adapter
->netdev
;
346 struct e1000_hw
*hw
= &adapter
->hw
;
347 struct igb_reg_info
*reginfo
;
348 struct igb_ring
*tx_ring
;
349 union e1000_adv_tx_desc
*tx_desc
;
350 struct my_u0
{ u64 a
; u64 b
; } *u0
;
351 struct igb_ring
*rx_ring
;
352 union e1000_adv_rx_desc
*rx_desc
;
356 if (!netif_msg_hw(adapter
))
359 /* Print netdevice Info */
361 dev_info(&adapter
->pdev
->dev
, "Net device Info\n");
362 printk(KERN_INFO
"Device Name state "
363 "trans_start last_rx\n");
364 printk(KERN_INFO
"%-15s %016lX %016lX %016lX\n",
371 /* Print Registers */
372 dev_info(&adapter
->pdev
->dev
, "Register Dump\n");
373 printk(KERN_INFO
" Register Name Value\n");
374 for (reginfo
= (struct igb_reg_info
*)igb_reg_info_tbl
;
375 reginfo
->name
; reginfo
++) {
376 igb_regdump(hw
, reginfo
);
379 /* Print TX Ring Summary */
380 if (!netdev
|| !netif_running(netdev
))
383 dev_info(&adapter
->pdev
->dev
, "TX Rings Summary\n");
384 printk(KERN_INFO
"Queue [NTU] [NTC] [bi(ntc)->dma ]"
385 " leng ntw timestamp\n");
386 for (n
= 0; n
< adapter
->num_tx_queues
; n
++) {
387 struct igb_tx_buffer
*buffer_info
;
388 tx_ring
= adapter
->tx_ring
[n
];
389 buffer_info
= &tx_ring
->tx_buffer_info
[tx_ring
->next_to_clean
];
390 printk(KERN_INFO
" %5d %5X %5X %016llX %04X %p %016llX\n",
391 n
, tx_ring
->next_to_use
, tx_ring
->next_to_clean
,
392 (u64
)buffer_info
->dma
,
394 buffer_info
->next_to_watch
,
395 (u64
)buffer_info
->time_stamp
);
399 if (!netif_msg_tx_done(adapter
))
400 goto rx_ring_summary
;
402 dev_info(&adapter
->pdev
->dev
, "TX Rings Dump\n");
404 /* Transmit Descriptor Formats
406 * Advanced Transmit Descriptor
407 * +--------------------------------------------------------------+
408 * 0 | Buffer Address [63:0] |
409 * +--------------------------------------------------------------+
410 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
411 * +--------------------------------------------------------------+
412 * 63 46 45 40 39 38 36 35 32 31 24 15 0
415 for (n
= 0; n
< adapter
->num_tx_queues
; n
++) {
416 tx_ring
= adapter
->tx_ring
[n
];
417 printk(KERN_INFO
"------------------------------------\n");
418 printk(KERN_INFO
"TX QUEUE INDEX = %d\n", tx_ring
->queue_index
);
419 printk(KERN_INFO
"------------------------------------\n");
420 printk(KERN_INFO
"T [desc] [address 63:0 ] "
421 "[PlPOCIStDDM Ln] [bi->dma ] "
422 "leng ntw timestamp bi->skb\n");
424 for (i
= 0; tx_ring
->desc
&& (i
< tx_ring
->count
); i
++) {
425 struct igb_tx_buffer
*buffer_info
;
426 tx_desc
= IGB_TX_DESC(tx_ring
, i
);
427 buffer_info
= &tx_ring
->tx_buffer_info
[i
];
428 u0
= (struct my_u0
*)tx_desc
;
429 printk(KERN_INFO
"T [0x%03X] %016llX %016llX %016llX"
430 " %04X %p %016llX %p", i
,
433 (u64
)buffer_info
->dma
,
435 buffer_info
->next_to_watch
,
436 (u64
)buffer_info
->time_stamp
,
438 if (i
== tx_ring
->next_to_use
&&
439 i
== tx_ring
->next_to_clean
)
440 printk(KERN_CONT
" NTC/U\n");
441 else if (i
== tx_ring
->next_to_use
)
442 printk(KERN_CONT
" NTU\n");
443 else if (i
== tx_ring
->next_to_clean
)
444 printk(KERN_CONT
" NTC\n");
446 printk(KERN_CONT
"\n");
448 if (netif_msg_pktdata(adapter
) && buffer_info
->dma
!= 0)
449 print_hex_dump(KERN_INFO
, "",
451 16, 1, phys_to_virt(buffer_info
->dma
),
452 buffer_info
->length
, true);
456 /* Print RX Rings Summary */
458 dev_info(&adapter
->pdev
->dev
, "RX Rings Summary\n");
459 printk(KERN_INFO
"Queue [NTU] [NTC]\n");
460 for (n
= 0; n
< adapter
->num_rx_queues
; n
++) {
461 rx_ring
= adapter
->rx_ring
[n
];
462 printk(KERN_INFO
" %5d %5X %5X\n", n
,
463 rx_ring
->next_to_use
, rx_ring
->next_to_clean
);
467 if (!netif_msg_rx_status(adapter
))
470 dev_info(&adapter
->pdev
->dev
, "RX Rings Dump\n");
472 /* Advanced Receive Descriptor (Read) Format
474 * +-----------------------------------------------------+
475 * 0 | Packet Buffer Address [63:1] |A0/NSE|
476 * +----------------------------------------------+------+
477 * 8 | Header Buffer Address [63:1] | DD |
478 * +-----------------------------------------------------+
481 * Advanced Receive Descriptor (Write-Back) Format
483 * 63 48 47 32 31 30 21 20 17 16 4 3 0
484 * +------------------------------------------------------+
485 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
486 * | Checksum Ident | | | | Type | Type |
487 * +------------------------------------------------------+
488 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
489 * +------------------------------------------------------+
490 * 63 48 47 32 31 20 19 0
493 for (n
= 0; n
< adapter
->num_rx_queues
; n
++) {
494 rx_ring
= adapter
->rx_ring
[n
];
495 printk(KERN_INFO
"------------------------------------\n");
496 printk(KERN_INFO
"RX QUEUE INDEX = %d\n", rx_ring
->queue_index
);
497 printk(KERN_INFO
"------------------------------------\n");
498 printk(KERN_INFO
"R [desc] [ PktBuf A0] "
499 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
500 "<-- Adv Rx Read format\n");
501 printk(KERN_INFO
"RWB[desc] [PcsmIpSHl PtRs] "
502 "[vl er S cks ln] ---------------- [bi->skb] "
503 "<-- Adv Rx Write-Back format\n");
505 for (i
= 0; i
< rx_ring
->count
; i
++) {
506 struct igb_rx_buffer
*buffer_info
;
507 buffer_info
= &rx_ring
->rx_buffer_info
[i
];
508 rx_desc
= IGB_RX_DESC(rx_ring
, i
);
509 u0
= (struct my_u0
*)rx_desc
;
510 staterr
= le32_to_cpu(rx_desc
->wb
.upper
.status_error
);
511 if (staterr
& E1000_RXD_STAT_DD
) {
512 /* Descriptor Done */
513 printk(KERN_INFO
"RWB[0x%03X] %016llX "
514 "%016llX ---------------- %p", i
,
519 printk(KERN_INFO
"R [0x%03X] %016llX "
520 "%016llX %016llX %p", i
,
523 (u64
)buffer_info
->dma
,
526 if (netif_msg_pktdata(adapter
)) {
527 print_hex_dump(KERN_INFO
, "",
530 phys_to_virt(buffer_info
->dma
),
531 IGB_RX_HDR_LEN
, true);
532 print_hex_dump(KERN_INFO
, "",
536 buffer_info
->page_dma
+
537 buffer_info
->page_offset
),
542 if (i
== rx_ring
->next_to_use
)
543 printk(KERN_CONT
" NTU\n");
544 else if (i
== rx_ring
->next_to_clean
)
545 printk(KERN_CONT
" NTC\n");
547 printk(KERN_CONT
"\n");
558 * igb_read_clock - read raw cycle counter (to be used by time counter)
560 static cycle_t
igb_read_clock(const struct cyclecounter
*tc
)
562 struct igb_adapter
*adapter
=
563 container_of(tc
, struct igb_adapter
, cycles
);
564 struct e1000_hw
*hw
= &adapter
->hw
;
569 * The timestamp latches on lowest register read. For the 82580
570 * the lowest register is SYSTIMR instead of SYSTIML. However we never
571 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
573 if (hw
->mac
.type
>= e1000_82580
) {
574 stamp
= rd32(E1000_SYSTIMR
) >> 8;
575 shift
= IGB_82580_TSYNC_SHIFT
;
578 stamp
|= (u64
)rd32(E1000_SYSTIML
) << shift
;
579 stamp
|= (u64
)rd32(E1000_SYSTIMH
) << (shift
+ 32);
584 * igb_get_hw_dev - return device
585 * used by hardware layer to print debugging information
587 struct net_device
*igb_get_hw_dev(struct e1000_hw
*hw
)
589 struct igb_adapter
*adapter
= hw
->back
;
590 return adapter
->netdev
;
594 * igb_init_module - Driver Registration Routine
596 * igb_init_module is the first routine called when the driver is
597 * loaded. All it does is register with the PCI subsystem.
599 static int __init
igb_init_module(void)
602 printk(KERN_INFO
"%s - version %s\n",
603 igb_driver_string
, igb_driver_version
);
605 printk(KERN_INFO
"%s\n", igb_copyright
);
607 #ifdef CONFIG_IGB_DCA
608 dca_register_notify(&dca_notifier
);
610 ret
= pci_register_driver(&igb_driver
);
614 module_init(igb_init_module
);
617 * igb_exit_module - Driver Exit Cleanup Routine
619 * igb_exit_module is called just before the driver is removed
622 static void __exit
igb_exit_module(void)
624 #ifdef CONFIG_IGB_DCA
625 dca_unregister_notify(&dca_notifier
);
627 pci_unregister_driver(&igb_driver
);
630 module_exit(igb_exit_module
);
632 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
634 * igb_cache_ring_register - Descriptor ring to register mapping
635 * @adapter: board private structure to initialize
637 * Once we know the feature-set enabled for the device, we'll cache
638 * the register offset the descriptor ring is assigned to.
640 static void igb_cache_ring_register(struct igb_adapter
*adapter
)
643 u32 rbase_offset
= adapter
->vfs_allocated_count
;
645 switch (adapter
->hw
.mac
.type
) {
647 /* The queues are allocated for virtualization such that VF 0
648 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
649 * In order to avoid collision we start at the first free queue
650 * and continue consuming queues in the same sequence
652 if (adapter
->vfs_allocated_count
) {
653 for (; i
< adapter
->rss_queues
; i
++)
654 adapter
->rx_ring
[i
]->reg_idx
= rbase_offset
+
661 for (; i
< adapter
->num_rx_queues
; i
++)
662 adapter
->rx_ring
[i
]->reg_idx
= rbase_offset
+ i
;
663 for (; j
< adapter
->num_tx_queues
; j
++)
664 adapter
->tx_ring
[j
]->reg_idx
= rbase_offset
+ j
;
669 static void igb_free_queues(struct igb_adapter
*adapter
)
673 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
674 kfree(adapter
->tx_ring
[i
]);
675 adapter
->tx_ring
[i
] = NULL
;
677 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
678 kfree(adapter
->rx_ring
[i
]);
679 adapter
->rx_ring
[i
] = NULL
;
681 adapter
->num_rx_queues
= 0;
682 adapter
->num_tx_queues
= 0;
686 * igb_alloc_queues - Allocate memory for all rings
687 * @adapter: board private structure to initialize
689 * We allocate one ring per queue at run-time since we don't know the
690 * number of queues at compile-time.
692 static int igb_alloc_queues(struct igb_adapter
*adapter
)
694 struct igb_ring
*ring
;
696 int orig_node
= adapter
->node
;
698 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
699 if (orig_node
== -1) {
700 int cur_node
= next_online_node(adapter
->node
);
701 if (cur_node
== MAX_NUMNODES
)
702 cur_node
= first_online_node
;
703 adapter
->node
= cur_node
;
705 ring
= kzalloc_node(sizeof(struct igb_ring
), GFP_KERNEL
,
708 ring
= kzalloc(sizeof(struct igb_ring
), GFP_KERNEL
);
711 ring
->count
= adapter
->tx_ring_count
;
712 ring
->queue_index
= i
;
713 ring
->dev
= &adapter
->pdev
->dev
;
714 ring
->netdev
= adapter
->netdev
;
715 ring
->numa_node
= adapter
->node
;
716 /* For 82575, context index must be unique per ring. */
717 if (adapter
->hw
.mac
.type
== e1000_82575
)
718 set_bit(IGB_RING_FLAG_TX_CTX_IDX
, &ring
->flags
);
719 adapter
->tx_ring
[i
] = ring
;
721 /* Restore the adapter's original node */
722 adapter
->node
= orig_node
;
724 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
725 if (orig_node
== -1) {
726 int cur_node
= next_online_node(adapter
->node
);
727 if (cur_node
== MAX_NUMNODES
)
728 cur_node
= first_online_node
;
729 adapter
->node
= cur_node
;
731 ring
= kzalloc_node(sizeof(struct igb_ring
), GFP_KERNEL
,
734 ring
= kzalloc(sizeof(struct igb_ring
), GFP_KERNEL
);
737 ring
->count
= adapter
->rx_ring_count
;
738 ring
->queue_index
= i
;
739 ring
->dev
= &adapter
->pdev
->dev
;
740 ring
->netdev
= adapter
->netdev
;
741 ring
->numa_node
= adapter
->node
;
742 /* set flag indicating ring supports SCTP checksum offload */
743 if (adapter
->hw
.mac
.type
>= e1000_82576
)
744 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM
, &ring
->flags
);
746 /* On i350, loopback VLAN packets have the tag byte-swapped. */
747 if (adapter
->hw
.mac
.type
== e1000_i350
)
748 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP
, &ring
->flags
);
750 adapter
->rx_ring
[i
] = ring
;
752 /* Restore the adapter's original node */
753 adapter
->node
= orig_node
;
755 igb_cache_ring_register(adapter
);
760 /* Restore the adapter's original node */
761 adapter
->node
= orig_node
;
762 igb_free_queues(adapter
);
768 * igb_write_ivar - configure ivar for given MSI-X vector
769 * @hw: pointer to the HW structure
770 * @msix_vector: vector number we are allocating to a given ring
771 * @index: row index of IVAR register to write within IVAR table
772 * @offset: column offset of in IVAR, should be multiple of 8
774 * This function is intended to handle the writing of the IVAR register
775 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
776 * each containing an cause allocation for an Rx and Tx ring, and a
777 * variable number of rows depending on the number of queues supported.
779 static void igb_write_ivar(struct e1000_hw
*hw
, int msix_vector
,
780 int index
, int offset
)
782 u32 ivar
= array_rd32(E1000_IVAR0
, index
);
784 /* clear any bits that are currently set */
785 ivar
&= ~((u32
)0xFF << offset
);
787 /* write vector and valid bit */
788 ivar
|= (msix_vector
| E1000_IVAR_VALID
) << offset
;
790 array_wr32(E1000_IVAR0
, index
, ivar
);
793 #define IGB_N0_QUEUE -1
794 static void igb_assign_vector(struct igb_q_vector
*q_vector
, int msix_vector
)
796 struct igb_adapter
*adapter
= q_vector
->adapter
;
797 struct e1000_hw
*hw
= &adapter
->hw
;
798 int rx_queue
= IGB_N0_QUEUE
;
799 int tx_queue
= IGB_N0_QUEUE
;
802 if (q_vector
->rx
.ring
)
803 rx_queue
= q_vector
->rx
.ring
->reg_idx
;
804 if (q_vector
->tx
.ring
)
805 tx_queue
= q_vector
->tx
.ring
->reg_idx
;
807 switch (hw
->mac
.type
) {
809 /* The 82575 assigns vectors using a bitmask, which matches the
810 bitmask for the EICR/EIMS/EIMC registers. To assign one
811 or more queues to a vector, we write the appropriate bits
812 into the MSIXBM register for that vector. */
813 if (rx_queue
> IGB_N0_QUEUE
)
814 msixbm
= E1000_EICR_RX_QUEUE0
<< rx_queue
;
815 if (tx_queue
> IGB_N0_QUEUE
)
816 msixbm
|= E1000_EICR_TX_QUEUE0
<< tx_queue
;
817 if (!adapter
->msix_entries
&& msix_vector
== 0)
818 msixbm
|= E1000_EIMS_OTHER
;
819 array_wr32(E1000_MSIXBM(0), msix_vector
, msixbm
);
820 q_vector
->eims_value
= msixbm
;
824 * 82576 uses a table that essentially consists of 2 columns
825 * with 8 rows. The ordering is column-major so we use the
826 * lower 3 bits as the row index, and the 4th bit as the
829 if (rx_queue
> IGB_N0_QUEUE
)
830 igb_write_ivar(hw
, msix_vector
,
832 (rx_queue
& 0x8) << 1);
833 if (tx_queue
> IGB_N0_QUEUE
)
834 igb_write_ivar(hw
, msix_vector
,
836 ((tx_queue
& 0x8) << 1) + 8);
837 q_vector
->eims_value
= 1 << msix_vector
;
842 * On 82580 and newer adapters the scheme is similar to 82576
843 * however instead of ordering column-major we have things
844 * ordered row-major. So we traverse the table by using
845 * bit 0 as the column offset, and the remaining bits as the
848 if (rx_queue
> IGB_N0_QUEUE
)
849 igb_write_ivar(hw
, msix_vector
,
851 (rx_queue
& 0x1) << 4);
852 if (tx_queue
> IGB_N0_QUEUE
)
853 igb_write_ivar(hw
, msix_vector
,
855 ((tx_queue
& 0x1) << 4) + 8);
856 q_vector
->eims_value
= 1 << msix_vector
;
863 /* add q_vector eims value to global eims_enable_mask */
864 adapter
->eims_enable_mask
|= q_vector
->eims_value
;
866 /* configure q_vector to set itr on first interrupt */
867 q_vector
->set_itr
= 1;
871 * igb_configure_msix - Configure MSI-X hardware
873 * igb_configure_msix sets up the hardware to properly
874 * generate MSI-X interrupts.
876 static void igb_configure_msix(struct igb_adapter
*adapter
)
880 struct e1000_hw
*hw
= &adapter
->hw
;
882 adapter
->eims_enable_mask
= 0;
884 /* set vector for other causes, i.e. link changes */
885 switch (hw
->mac
.type
) {
887 tmp
= rd32(E1000_CTRL_EXT
);
888 /* enable MSI-X PBA support*/
889 tmp
|= E1000_CTRL_EXT_PBA_CLR
;
891 /* Auto-Mask interrupts upon ICR read. */
892 tmp
|= E1000_CTRL_EXT_EIAME
;
893 tmp
|= E1000_CTRL_EXT_IRCA
;
895 wr32(E1000_CTRL_EXT
, tmp
);
897 /* enable msix_other interrupt */
898 array_wr32(E1000_MSIXBM(0), vector
++,
900 adapter
->eims_other
= E1000_EIMS_OTHER
;
907 /* Turn on MSI-X capability first, or our settings
908 * won't stick. And it will take days to debug. */
909 wr32(E1000_GPIE
, E1000_GPIE_MSIX_MODE
|
910 E1000_GPIE_PBA
| E1000_GPIE_EIAME
|
913 /* enable msix_other interrupt */
914 adapter
->eims_other
= 1 << vector
;
915 tmp
= (vector
++ | E1000_IVAR_VALID
) << 8;
917 wr32(E1000_IVAR_MISC
, tmp
);
920 /* do nothing, since nothing else supports MSI-X */
922 } /* switch (hw->mac.type) */
924 adapter
->eims_enable_mask
|= adapter
->eims_other
;
926 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
927 igb_assign_vector(adapter
->q_vector
[i
], vector
++);
933 * igb_request_msix - Initialize MSI-X interrupts
935 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
938 static int igb_request_msix(struct igb_adapter
*adapter
)
940 struct net_device
*netdev
= adapter
->netdev
;
941 struct e1000_hw
*hw
= &adapter
->hw
;
942 int i
, err
= 0, vector
= 0;
944 err
= request_irq(adapter
->msix_entries
[vector
].vector
,
945 igb_msix_other
, 0, netdev
->name
, adapter
);
950 for (i
= 0; i
< adapter
->num_q_vectors
; i
++) {
951 struct igb_q_vector
*q_vector
= adapter
->q_vector
[i
];
953 q_vector
->itr_register
= hw
->hw_addr
+ E1000_EITR(vector
);
955 if (q_vector
->rx
.ring
&& q_vector
->tx
.ring
)
956 sprintf(q_vector
->name
, "%s-TxRx-%u", netdev
->name
,
957 q_vector
->rx
.ring
->queue_index
);
958 else if (q_vector
->tx
.ring
)
959 sprintf(q_vector
->name
, "%s-tx-%u", netdev
->name
,
960 q_vector
->tx
.ring
->queue_index
);
961 else if (q_vector
->rx
.ring
)
962 sprintf(q_vector
->name
, "%s-rx-%u", netdev
->name
,
963 q_vector
->rx
.ring
->queue_index
);
965 sprintf(q_vector
->name
, "%s-unused", netdev
->name
);
967 err
= request_irq(adapter
->msix_entries
[vector
].vector
,
968 igb_msix_ring
, 0, q_vector
->name
,
975 igb_configure_msix(adapter
);
981 static void igb_reset_interrupt_capability(struct igb_adapter
*adapter
)
983 if (adapter
->msix_entries
) {
984 pci_disable_msix(adapter
->pdev
);
985 kfree(adapter
->msix_entries
);
986 adapter
->msix_entries
= NULL
;
987 } else if (adapter
->flags
& IGB_FLAG_HAS_MSI
) {
988 pci_disable_msi(adapter
->pdev
);
993 * igb_free_q_vectors - Free memory allocated for interrupt vectors
994 * @adapter: board private structure to initialize
996 * This function frees the memory allocated to the q_vectors. In addition if
997 * NAPI is enabled it will delete any references to the NAPI struct prior
998 * to freeing the q_vector.
1000 static void igb_free_q_vectors(struct igb_adapter
*adapter
)
1004 for (v_idx
= 0; v_idx
< adapter
->num_q_vectors
; v_idx
++) {
1005 struct igb_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1006 adapter
->q_vector
[v_idx
] = NULL
;
1009 netif_napi_del(&q_vector
->napi
);
1012 adapter
->num_q_vectors
= 0;
1016 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1018 * This function resets the device so that it has 0 rx queues, tx queues, and
1019 * MSI-X interrupts allocated.
1021 static void igb_clear_interrupt_scheme(struct igb_adapter
*adapter
)
1023 igb_free_queues(adapter
);
1024 igb_free_q_vectors(adapter
);
1025 igb_reset_interrupt_capability(adapter
);
1029 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1031 * Attempt to configure interrupts using the best available
1032 * capabilities of the hardware and kernel.
1034 static int igb_set_interrupt_capability(struct igb_adapter
*adapter
)
1039 /* Number of supported queues. */
1040 adapter
->num_rx_queues
= adapter
->rss_queues
;
1041 if (adapter
->vfs_allocated_count
)
1042 adapter
->num_tx_queues
= 1;
1044 adapter
->num_tx_queues
= adapter
->rss_queues
;
1046 /* start with one vector for every rx queue */
1047 numvecs
= adapter
->num_rx_queues
;
1049 /* if tx handler is separate add 1 for every tx queue */
1050 if (!(adapter
->flags
& IGB_FLAG_QUEUE_PAIRS
))
1051 numvecs
+= adapter
->num_tx_queues
;
1053 /* store the number of vectors reserved for queues */
1054 adapter
->num_q_vectors
= numvecs
;
1056 /* add 1 vector for link status interrupts */
1058 adapter
->msix_entries
= kcalloc(numvecs
, sizeof(struct msix_entry
),
1060 if (!adapter
->msix_entries
)
1063 for (i
= 0; i
< numvecs
; i
++)
1064 adapter
->msix_entries
[i
].entry
= i
;
1066 err
= pci_enable_msix(adapter
->pdev
,
1067 adapter
->msix_entries
,
1072 igb_reset_interrupt_capability(adapter
);
1074 /* If we can't do MSI-X, try MSI */
1076 #ifdef CONFIG_PCI_IOV
1077 /* disable SR-IOV for non MSI-X configurations */
1078 if (adapter
->vf_data
) {
1079 struct e1000_hw
*hw
= &adapter
->hw
;
1080 /* disable iov and allow time for transactions to clear */
1081 pci_disable_sriov(adapter
->pdev
);
1084 kfree(adapter
->vf_data
);
1085 adapter
->vf_data
= NULL
;
1086 wr32(E1000_IOVCTL
, E1000_IOVCTL_REUSE_VFQ
);
1089 dev_info(&adapter
->pdev
->dev
, "IOV Disabled\n");
1092 adapter
->vfs_allocated_count
= 0;
1093 adapter
->rss_queues
= 1;
1094 adapter
->flags
|= IGB_FLAG_QUEUE_PAIRS
;
1095 adapter
->num_rx_queues
= 1;
1096 adapter
->num_tx_queues
= 1;
1097 adapter
->num_q_vectors
= 1;
1098 if (!pci_enable_msi(adapter
->pdev
))
1099 adapter
->flags
|= IGB_FLAG_HAS_MSI
;
1101 /* Notify the stack of the (possibly) reduced queue counts. */
1102 netif_set_real_num_tx_queues(adapter
->netdev
, adapter
->num_tx_queues
);
1103 return netif_set_real_num_rx_queues(adapter
->netdev
,
1104 adapter
->num_rx_queues
);
1108 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1109 * @adapter: board private structure to initialize
1111 * We allocate one q_vector per queue interrupt. If allocation fails we
1114 static int igb_alloc_q_vectors(struct igb_adapter
*adapter
)
1116 struct igb_q_vector
*q_vector
;
1117 struct e1000_hw
*hw
= &adapter
->hw
;
1119 int orig_node
= adapter
->node
;
1121 for (v_idx
= 0; v_idx
< adapter
->num_q_vectors
; v_idx
++) {
1122 if ((adapter
->num_q_vectors
== (adapter
->num_rx_queues
+
1123 adapter
->num_tx_queues
)) &&
1124 (adapter
->num_rx_queues
== v_idx
))
1125 adapter
->node
= orig_node
;
1126 if (orig_node
== -1) {
1127 int cur_node
= next_online_node(adapter
->node
);
1128 if (cur_node
== MAX_NUMNODES
)
1129 cur_node
= first_online_node
;
1130 adapter
->node
= cur_node
;
1132 q_vector
= kzalloc_node(sizeof(struct igb_q_vector
), GFP_KERNEL
,
1135 q_vector
= kzalloc(sizeof(struct igb_q_vector
),
1139 q_vector
->adapter
= adapter
;
1140 q_vector
->itr_register
= hw
->hw_addr
+ E1000_EITR(0);
1141 q_vector
->itr_val
= IGB_START_ITR
;
1142 netif_napi_add(adapter
->netdev
, &q_vector
->napi
, igb_poll
, 64);
1143 adapter
->q_vector
[v_idx
] = q_vector
;
1145 /* Restore the adapter's original node */
1146 adapter
->node
= orig_node
;
1151 /* Restore the adapter's original node */
1152 adapter
->node
= orig_node
;
1153 igb_free_q_vectors(adapter
);
1157 static void igb_map_rx_ring_to_vector(struct igb_adapter
*adapter
,
1158 int ring_idx
, int v_idx
)
1160 struct igb_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1162 q_vector
->rx
.ring
= adapter
->rx_ring
[ring_idx
];
1163 q_vector
->rx
.ring
->q_vector
= q_vector
;
1164 q_vector
->rx
.count
++;
1165 q_vector
->itr_val
= adapter
->rx_itr_setting
;
1166 if (q_vector
->itr_val
&& q_vector
->itr_val
<= 3)
1167 q_vector
->itr_val
= IGB_START_ITR
;
1170 static void igb_map_tx_ring_to_vector(struct igb_adapter
*adapter
,
1171 int ring_idx
, int v_idx
)
1173 struct igb_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1175 q_vector
->tx
.ring
= adapter
->tx_ring
[ring_idx
];
1176 q_vector
->tx
.ring
->q_vector
= q_vector
;
1177 q_vector
->tx
.count
++;
1178 q_vector
->itr_val
= adapter
->tx_itr_setting
;
1179 q_vector
->tx
.work_limit
= adapter
->tx_work_limit
;
1180 if (q_vector
->itr_val
&& q_vector
->itr_val
<= 3)
1181 q_vector
->itr_val
= IGB_START_ITR
;
1185 * igb_map_ring_to_vector - maps allocated queues to vectors
1187 * This function maps the recently allocated queues to vectors.
1189 static int igb_map_ring_to_vector(struct igb_adapter
*adapter
)
1194 if ((adapter
->num_q_vectors
< adapter
->num_rx_queues
) ||
1195 (adapter
->num_q_vectors
< adapter
->num_tx_queues
))
1198 if (adapter
->num_q_vectors
>=
1199 (adapter
->num_rx_queues
+ adapter
->num_tx_queues
)) {
1200 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
1201 igb_map_rx_ring_to_vector(adapter
, i
, v_idx
++);
1202 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
1203 igb_map_tx_ring_to_vector(adapter
, i
, v_idx
++);
1205 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
1206 if (i
< adapter
->num_tx_queues
)
1207 igb_map_tx_ring_to_vector(adapter
, i
, v_idx
);
1208 igb_map_rx_ring_to_vector(adapter
, i
, v_idx
++);
1210 for (; i
< adapter
->num_tx_queues
; i
++)
1211 igb_map_tx_ring_to_vector(adapter
, i
, v_idx
++);
1217 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1219 * This function initializes the interrupts and allocates all of the queues.
1221 static int igb_init_interrupt_scheme(struct igb_adapter
*adapter
)
1223 struct pci_dev
*pdev
= adapter
->pdev
;
1226 err
= igb_set_interrupt_capability(adapter
);
1230 err
= igb_alloc_q_vectors(adapter
);
1232 dev_err(&pdev
->dev
, "Unable to allocate memory for vectors\n");
1233 goto err_alloc_q_vectors
;
1236 err
= igb_alloc_queues(adapter
);
1238 dev_err(&pdev
->dev
, "Unable to allocate memory for queues\n");
1239 goto err_alloc_queues
;
1242 err
= igb_map_ring_to_vector(adapter
);
1244 dev_err(&pdev
->dev
, "Invalid q_vector to ring mapping\n");
1245 goto err_map_queues
;
1251 igb_free_queues(adapter
);
1253 igb_free_q_vectors(adapter
);
1254 err_alloc_q_vectors
:
1255 igb_reset_interrupt_capability(adapter
);
1260 * igb_request_irq - initialize interrupts
1262 * Attempts to configure interrupts using the best available
1263 * capabilities of the hardware and kernel.
1265 static int igb_request_irq(struct igb_adapter
*adapter
)
1267 struct net_device
*netdev
= adapter
->netdev
;
1268 struct pci_dev
*pdev
= adapter
->pdev
;
1271 if (adapter
->msix_entries
) {
1272 err
= igb_request_msix(adapter
);
1275 /* fall back to MSI */
1276 igb_clear_interrupt_scheme(adapter
);
1277 if (!pci_enable_msi(pdev
))
1278 adapter
->flags
|= IGB_FLAG_HAS_MSI
;
1279 igb_free_all_tx_resources(adapter
);
1280 igb_free_all_rx_resources(adapter
);
1281 adapter
->num_tx_queues
= 1;
1282 adapter
->num_rx_queues
= 1;
1283 adapter
->num_q_vectors
= 1;
1284 err
= igb_alloc_q_vectors(adapter
);
1287 "Unable to allocate memory for vectors\n");
1290 err
= igb_alloc_queues(adapter
);
1293 "Unable to allocate memory for queues\n");
1294 igb_free_q_vectors(adapter
);
1297 igb_setup_all_tx_resources(adapter
);
1298 igb_setup_all_rx_resources(adapter
);
1301 igb_assign_vector(adapter
->q_vector
[0], 0);
1303 if (adapter
->flags
& IGB_FLAG_HAS_MSI
) {
1304 err
= request_irq(pdev
->irq
, igb_intr_msi
, 0,
1305 netdev
->name
, adapter
);
1309 /* fall back to legacy interrupts */
1310 igb_reset_interrupt_capability(adapter
);
1311 adapter
->flags
&= ~IGB_FLAG_HAS_MSI
;
1314 err
= request_irq(pdev
->irq
, igb_intr
, IRQF_SHARED
,
1315 netdev
->name
, adapter
);
1318 dev_err(&pdev
->dev
, "Error %d getting interrupt\n",
1325 static void igb_free_irq(struct igb_adapter
*adapter
)
1327 if (adapter
->msix_entries
) {
1330 free_irq(adapter
->msix_entries
[vector
++].vector
, adapter
);
1332 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1333 free_irq(adapter
->msix_entries
[vector
++].vector
,
1334 adapter
->q_vector
[i
]);
1336 free_irq(adapter
->pdev
->irq
, adapter
);
1341 * igb_irq_disable - Mask off interrupt generation on the NIC
1342 * @adapter: board private structure
1344 static void igb_irq_disable(struct igb_adapter
*adapter
)
1346 struct e1000_hw
*hw
= &adapter
->hw
;
1349 * we need to be careful when disabling interrupts. The VFs are also
1350 * mapped into these registers and so clearing the bits can cause
1351 * issues on the VF drivers so we only need to clear what we set
1353 if (adapter
->msix_entries
) {
1354 u32 regval
= rd32(E1000_EIAM
);
1355 wr32(E1000_EIAM
, regval
& ~adapter
->eims_enable_mask
);
1356 wr32(E1000_EIMC
, adapter
->eims_enable_mask
);
1357 regval
= rd32(E1000_EIAC
);
1358 wr32(E1000_EIAC
, regval
& ~adapter
->eims_enable_mask
);
1362 wr32(E1000_IMC
, ~0);
1364 if (adapter
->msix_entries
) {
1366 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1367 synchronize_irq(adapter
->msix_entries
[i
].vector
);
1369 synchronize_irq(adapter
->pdev
->irq
);
1374 * igb_irq_enable - Enable default interrupt generation settings
1375 * @adapter: board private structure
1377 static void igb_irq_enable(struct igb_adapter
*adapter
)
1379 struct e1000_hw
*hw
= &adapter
->hw
;
1381 if (adapter
->msix_entries
) {
1382 u32 ims
= E1000_IMS_LSC
| E1000_IMS_DOUTSYNC
| E1000_IMS_DRSTA
;
1383 u32 regval
= rd32(E1000_EIAC
);
1384 wr32(E1000_EIAC
, regval
| adapter
->eims_enable_mask
);
1385 regval
= rd32(E1000_EIAM
);
1386 wr32(E1000_EIAM
, regval
| adapter
->eims_enable_mask
);
1387 wr32(E1000_EIMS
, adapter
->eims_enable_mask
);
1388 if (adapter
->vfs_allocated_count
) {
1389 wr32(E1000_MBVFIMR
, 0xFF);
1390 ims
|= E1000_IMS_VMMB
;
1392 wr32(E1000_IMS
, ims
);
1394 wr32(E1000_IMS
, IMS_ENABLE_MASK
|
1396 wr32(E1000_IAM
, IMS_ENABLE_MASK
|
1401 static void igb_update_mng_vlan(struct igb_adapter
*adapter
)
1403 struct e1000_hw
*hw
= &adapter
->hw
;
1404 u16 vid
= adapter
->hw
.mng_cookie
.vlan_id
;
1405 u16 old_vid
= adapter
->mng_vlan_id
;
1407 if (hw
->mng_cookie
.status
& E1000_MNG_DHCP_COOKIE_STATUS_VLAN
) {
1408 /* add VID to filter table */
1409 igb_vfta_set(hw
, vid
, true);
1410 adapter
->mng_vlan_id
= vid
;
1412 adapter
->mng_vlan_id
= IGB_MNG_VLAN_NONE
;
1415 if ((old_vid
!= (u16
)IGB_MNG_VLAN_NONE
) &&
1417 !test_bit(old_vid
, adapter
->active_vlans
)) {
1418 /* remove VID from filter table */
1419 igb_vfta_set(hw
, old_vid
, false);
1424 * igb_release_hw_control - release control of the h/w to f/w
1425 * @adapter: address of board private structure
1427 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1428 * For ASF and Pass Through versions of f/w this means that the
1429 * driver is no longer loaded.
1432 static void igb_release_hw_control(struct igb_adapter
*adapter
)
1434 struct e1000_hw
*hw
= &adapter
->hw
;
1437 /* Let firmware take over control of h/w */
1438 ctrl_ext
= rd32(E1000_CTRL_EXT
);
1439 wr32(E1000_CTRL_EXT
,
1440 ctrl_ext
& ~E1000_CTRL_EXT_DRV_LOAD
);
1444 * igb_get_hw_control - get control of the h/w from f/w
1445 * @adapter: address of board private structure
1447 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1448 * For ASF and Pass Through versions of f/w this means that
1449 * the driver is loaded.
1452 static void igb_get_hw_control(struct igb_adapter
*adapter
)
1454 struct e1000_hw
*hw
= &adapter
->hw
;
1457 /* Let firmware know the driver has taken over */
1458 ctrl_ext
= rd32(E1000_CTRL_EXT
);
1459 wr32(E1000_CTRL_EXT
,
1460 ctrl_ext
| E1000_CTRL_EXT_DRV_LOAD
);
1464 * igb_configure - configure the hardware for RX and TX
1465 * @adapter: private board structure
1467 static void igb_configure(struct igb_adapter
*adapter
)
1469 struct net_device
*netdev
= adapter
->netdev
;
1472 igb_get_hw_control(adapter
);
1473 igb_set_rx_mode(netdev
);
1475 igb_restore_vlan(adapter
);
1477 igb_setup_tctl(adapter
);
1478 igb_setup_mrqc(adapter
);
1479 igb_setup_rctl(adapter
);
1481 igb_configure_tx(adapter
);
1482 igb_configure_rx(adapter
);
1484 igb_rx_fifo_flush_82575(&adapter
->hw
);
1486 /* call igb_desc_unused which always leaves
1487 * at least 1 descriptor unused to make sure
1488 * next_to_use != next_to_clean */
1489 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
1490 struct igb_ring
*ring
= adapter
->rx_ring
[i
];
1491 igb_alloc_rx_buffers(ring
, igb_desc_unused(ring
));
1496 * igb_power_up_link - Power up the phy/serdes link
1497 * @adapter: address of board private structure
1499 void igb_power_up_link(struct igb_adapter
*adapter
)
1501 if (adapter
->hw
.phy
.media_type
== e1000_media_type_copper
)
1502 igb_power_up_phy_copper(&adapter
->hw
);
1504 igb_power_up_serdes_link_82575(&adapter
->hw
);
1508 * igb_power_down_link - Power down the phy/serdes link
1509 * @adapter: address of board private structure
1511 static void igb_power_down_link(struct igb_adapter
*adapter
)
1513 if (adapter
->hw
.phy
.media_type
== e1000_media_type_copper
)
1514 igb_power_down_phy_copper_82575(&adapter
->hw
);
1516 igb_shutdown_serdes_link_82575(&adapter
->hw
);
1520 * igb_up - Open the interface and prepare it to handle traffic
1521 * @adapter: board private structure
1523 int igb_up(struct igb_adapter
*adapter
)
1525 struct e1000_hw
*hw
= &adapter
->hw
;
1528 /* hardware has been reset, we need to reload some things */
1529 igb_configure(adapter
);
1531 clear_bit(__IGB_DOWN
, &adapter
->state
);
1533 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1534 napi_enable(&(adapter
->q_vector
[i
]->napi
));
1536 if (adapter
->msix_entries
)
1537 igb_configure_msix(adapter
);
1539 igb_assign_vector(adapter
->q_vector
[0], 0);
1541 /* Clear any pending interrupts. */
1543 igb_irq_enable(adapter
);
1545 /* notify VFs that reset has been completed */
1546 if (adapter
->vfs_allocated_count
) {
1547 u32 reg_data
= rd32(E1000_CTRL_EXT
);
1548 reg_data
|= E1000_CTRL_EXT_PFRSTD
;
1549 wr32(E1000_CTRL_EXT
, reg_data
);
1552 netif_tx_start_all_queues(adapter
->netdev
);
1554 /* start the watchdog. */
1555 hw
->mac
.get_link_status
= 1;
1556 schedule_work(&adapter
->watchdog_task
);
1561 void igb_down(struct igb_adapter
*adapter
)
1563 struct net_device
*netdev
= adapter
->netdev
;
1564 struct e1000_hw
*hw
= &adapter
->hw
;
1568 /* signal that we're down so the interrupt handler does not
1569 * reschedule our watchdog timer */
1570 set_bit(__IGB_DOWN
, &adapter
->state
);
1572 /* disable receives in the hardware */
1573 rctl
= rd32(E1000_RCTL
);
1574 wr32(E1000_RCTL
, rctl
& ~E1000_RCTL_EN
);
1575 /* flush and sleep below */
1577 netif_tx_stop_all_queues(netdev
);
1579 /* disable transmits in the hardware */
1580 tctl
= rd32(E1000_TCTL
);
1581 tctl
&= ~E1000_TCTL_EN
;
1582 wr32(E1000_TCTL
, tctl
);
1583 /* flush both disables and wait for them to finish */
1587 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1588 napi_disable(&(adapter
->q_vector
[i
]->napi
));
1590 igb_irq_disable(adapter
);
1592 del_timer_sync(&adapter
->watchdog_timer
);
1593 del_timer_sync(&adapter
->phy_info_timer
);
1595 netif_carrier_off(netdev
);
1597 /* record the stats before reset*/
1598 spin_lock(&adapter
->stats64_lock
);
1599 igb_update_stats(adapter
, &adapter
->stats64
);
1600 spin_unlock(&adapter
->stats64_lock
);
1602 adapter
->link_speed
= 0;
1603 adapter
->link_duplex
= 0;
1605 if (!pci_channel_offline(adapter
->pdev
))
1607 igb_clean_all_tx_rings(adapter
);
1608 igb_clean_all_rx_rings(adapter
);
1609 #ifdef CONFIG_IGB_DCA
1611 /* since we reset the hardware DCA settings were cleared */
1612 igb_setup_dca(adapter
);
1616 void igb_reinit_locked(struct igb_adapter
*adapter
)
1618 WARN_ON(in_interrupt());
1619 while (test_and_set_bit(__IGB_RESETTING
, &adapter
->state
))
1623 clear_bit(__IGB_RESETTING
, &adapter
->state
);
1626 void igb_reset(struct igb_adapter
*adapter
)
1628 struct pci_dev
*pdev
= adapter
->pdev
;
1629 struct e1000_hw
*hw
= &adapter
->hw
;
1630 struct e1000_mac_info
*mac
= &hw
->mac
;
1631 struct e1000_fc_info
*fc
= &hw
->fc
;
1632 u32 pba
= 0, tx_space
, min_tx_space
, min_rx_space
;
1635 /* Repartition Pba for greater than 9k mtu
1636 * To take effect CTRL.RST is required.
1638 switch (mac
->type
) {
1641 pba
= rd32(E1000_RXPBS
);
1642 pba
= igb_rxpbs_adjust_82580(pba
);
1645 pba
= rd32(E1000_RXPBS
);
1646 pba
&= E1000_RXPBS_SIZE_MASK_82576
;
1650 pba
= E1000_PBA_34K
;
1654 if ((adapter
->max_frame_size
> ETH_FRAME_LEN
+ ETH_FCS_LEN
) &&
1655 (mac
->type
< e1000_82576
)) {
1656 /* adjust PBA for jumbo frames */
1657 wr32(E1000_PBA
, pba
);
1659 /* To maintain wire speed transmits, the Tx FIFO should be
1660 * large enough to accommodate two full transmit packets,
1661 * rounded up to the next 1KB and expressed in KB. Likewise,
1662 * the Rx FIFO should be large enough to accommodate at least
1663 * one full receive packet and is similarly rounded up and
1664 * expressed in KB. */
1665 pba
= rd32(E1000_PBA
);
1666 /* upper 16 bits has Tx packet buffer allocation size in KB */
1667 tx_space
= pba
>> 16;
1668 /* lower 16 bits has Rx packet buffer allocation size in KB */
1670 /* the tx fifo also stores 16 bytes of information about the tx
1671 * but don't include ethernet FCS because hardware appends it */
1672 min_tx_space
= (adapter
->max_frame_size
+
1673 sizeof(union e1000_adv_tx_desc
) -
1675 min_tx_space
= ALIGN(min_tx_space
, 1024);
1676 min_tx_space
>>= 10;
1677 /* software strips receive CRC, so leave room for it */
1678 min_rx_space
= adapter
->max_frame_size
;
1679 min_rx_space
= ALIGN(min_rx_space
, 1024);
1680 min_rx_space
>>= 10;
1682 /* If current Tx allocation is less than the min Tx FIFO size,
1683 * and the min Tx FIFO size is less than the current Rx FIFO
1684 * allocation, take space away from current Rx allocation */
1685 if (tx_space
< min_tx_space
&&
1686 ((min_tx_space
- tx_space
) < pba
)) {
1687 pba
= pba
- (min_tx_space
- tx_space
);
1689 /* if short on rx space, rx wins and must trump tx
1691 if (pba
< min_rx_space
)
1694 wr32(E1000_PBA
, pba
);
1697 /* flow control settings */
1698 /* The high water mark must be low enough to fit one full frame
1699 * (or the size used for early receive) above it in the Rx FIFO.
1700 * Set it to the lower of:
1701 * - 90% of the Rx FIFO size, or
1702 * - the full Rx FIFO size minus one full frame */
1703 hwm
= min(((pba
<< 10) * 9 / 10),
1704 ((pba
<< 10) - 2 * adapter
->max_frame_size
));
1706 fc
->high_water
= hwm
& 0xFFF0; /* 16-byte granularity */
1707 fc
->low_water
= fc
->high_water
- 16;
1708 fc
->pause_time
= 0xFFFF;
1710 fc
->current_mode
= fc
->requested_mode
;
1712 /* disable receive for all VFs and wait one second */
1713 if (adapter
->vfs_allocated_count
) {
1715 for (i
= 0 ; i
< adapter
->vfs_allocated_count
; i
++)
1716 adapter
->vf_data
[i
].flags
&= IGB_VF_FLAG_PF_SET_MAC
;
1718 /* ping all the active vfs to let them know we are going down */
1719 igb_ping_all_vfs(adapter
);
1721 /* disable transmits and receives */
1722 wr32(E1000_VFRE
, 0);
1723 wr32(E1000_VFTE
, 0);
1726 /* Allow time for pending master requests to run */
1727 hw
->mac
.ops
.reset_hw(hw
);
1730 if (hw
->mac
.ops
.init_hw(hw
))
1731 dev_err(&pdev
->dev
, "Hardware Error\n");
1733 igb_init_dmac(adapter
, pba
);
1734 if (!netif_running(adapter
->netdev
))
1735 igb_power_down_link(adapter
);
1737 igb_update_mng_vlan(adapter
);
1739 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1740 wr32(E1000_VET
, ETHERNET_IEEE_VLAN_TYPE
);
1742 igb_get_phy_info(hw
);
1745 static u32
igb_fix_features(struct net_device
*netdev
, u32 features
)
1748 * Since there is no support for separate rx/tx vlan accel
1749 * enable/disable make sure tx flag is always in same state as rx.
1751 if (features
& NETIF_F_HW_VLAN_RX
)
1752 features
|= NETIF_F_HW_VLAN_TX
;
1754 features
&= ~NETIF_F_HW_VLAN_TX
;
1759 static int igb_set_features(struct net_device
*netdev
, u32 features
)
1761 u32 changed
= netdev
->features
^ features
;
1763 if (changed
& NETIF_F_HW_VLAN_RX
)
1764 igb_vlan_mode(netdev
, features
);
1769 static const struct net_device_ops igb_netdev_ops
= {
1770 .ndo_open
= igb_open
,
1771 .ndo_stop
= igb_close
,
1772 .ndo_start_xmit
= igb_xmit_frame
,
1773 .ndo_get_stats64
= igb_get_stats64
,
1774 .ndo_set_rx_mode
= igb_set_rx_mode
,
1775 .ndo_set_mac_address
= igb_set_mac
,
1776 .ndo_change_mtu
= igb_change_mtu
,
1777 .ndo_do_ioctl
= igb_ioctl
,
1778 .ndo_tx_timeout
= igb_tx_timeout
,
1779 .ndo_validate_addr
= eth_validate_addr
,
1780 .ndo_vlan_rx_add_vid
= igb_vlan_rx_add_vid
,
1781 .ndo_vlan_rx_kill_vid
= igb_vlan_rx_kill_vid
,
1782 .ndo_set_vf_mac
= igb_ndo_set_vf_mac
,
1783 .ndo_set_vf_vlan
= igb_ndo_set_vf_vlan
,
1784 .ndo_set_vf_tx_rate
= igb_ndo_set_vf_bw
,
1785 .ndo_get_vf_config
= igb_ndo_get_vf_config
,
1786 #ifdef CONFIG_NET_POLL_CONTROLLER
1787 .ndo_poll_controller
= igb_netpoll
,
1789 .ndo_fix_features
= igb_fix_features
,
1790 .ndo_set_features
= igb_set_features
,
1794 * igb_probe - Device Initialization Routine
1795 * @pdev: PCI device information struct
1796 * @ent: entry in igb_pci_tbl
1798 * Returns 0 on success, negative on failure
1800 * igb_probe initializes an adapter identified by a pci_dev structure.
1801 * The OS initialization, configuring of the adapter private structure,
1802 * and a hardware reset occur.
1804 static int __devinit
igb_probe(struct pci_dev
*pdev
,
1805 const struct pci_device_id
*ent
)
1807 struct net_device
*netdev
;
1808 struct igb_adapter
*adapter
;
1809 struct e1000_hw
*hw
;
1810 u16 eeprom_data
= 0;
1812 static int global_quad_port_a
; /* global quad port a indication */
1813 const struct e1000_info
*ei
= igb_info_tbl
[ent
->driver_data
];
1814 unsigned long mmio_start
, mmio_len
;
1815 int err
, pci_using_dac
;
1816 u16 eeprom_apme_mask
= IGB_EEPROM_APME
;
1817 u8 part_str
[E1000_PBANUM_LENGTH
];
1819 /* Catch broken hardware that put the wrong VF device ID in
1820 * the PCIe SR-IOV capability.
1822 if (pdev
->is_virtfn
) {
1823 WARN(1, KERN_ERR
"%s (%hx:%hx) should not be a VF!\n",
1824 pci_name(pdev
), pdev
->vendor
, pdev
->device
);
1828 err
= pci_enable_device_mem(pdev
);
1833 err
= dma_set_mask(&pdev
->dev
, DMA_BIT_MASK(64));
1835 err
= dma_set_coherent_mask(&pdev
->dev
, DMA_BIT_MASK(64));
1839 err
= dma_set_mask(&pdev
->dev
, DMA_BIT_MASK(32));
1841 err
= dma_set_coherent_mask(&pdev
->dev
, DMA_BIT_MASK(32));
1843 dev_err(&pdev
->dev
, "No usable DMA "
1844 "configuration, aborting\n");
1850 err
= pci_request_selected_regions(pdev
, pci_select_bars(pdev
,
1856 pci_enable_pcie_error_reporting(pdev
);
1858 pci_set_master(pdev
);
1859 pci_save_state(pdev
);
1862 netdev
= alloc_etherdev_mq(sizeof(struct igb_adapter
),
1865 goto err_alloc_etherdev
;
1867 SET_NETDEV_DEV(netdev
, &pdev
->dev
);
1869 pci_set_drvdata(pdev
, netdev
);
1870 adapter
= netdev_priv(netdev
);
1871 adapter
->netdev
= netdev
;
1872 adapter
->pdev
= pdev
;
1875 adapter
->msg_enable
= NETIF_MSG_DRV
| NETIF_MSG_PROBE
;
1877 mmio_start
= pci_resource_start(pdev
, 0);
1878 mmio_len
= pci_resource_len(pdev
, 0);
1881 hw
->hw_addr
= ioremap(mmio_start
, mmio_len
);
1885 netdev
->netdev_ops
= &igb_netdev_ops
;
1886 igb_set_ethtool_ops(netdev
);
1887 netdev
->watchdog_timeo
= 5 * HZ
;
1889 strncpy(netdev
->name
, pci_name(pdev
), sizeof(netdev
->name
) - 1);
1891 netdev
->mem_start
= mmio_start
;
1892 netdev
->mem_end
= mmio_start
+ mmio_len
;
1894 /* PCI config space info */
1895 hw
->vendor_id
= pdev
->vendor
;
1896 hw
->device_id
= pdev
->device
;
1897 hw
->revision_id
= pdev
->revision
;
1898 hw
->subsystem_vendor_id
= pdev
->subsystem_vendor
;
1899 hw
->subsystem_device_id
= pdev
->subsystem_device
;
1901 /* Copy the default MAC, PHY and NVM function pointers */
1902 memcpy(&hw
->mac
.ops
, ei
->mac_ops
, sizeof(hw
->mac
.ops
));
1903 memcpy(&hw
->phy
.ops
, ei
->phy_ops
, sizeof(hw
->phy
.ops
));
1904 memcpy(&hw
->nvm
.ops
, ei
->nvm_ops
, sizeof(hw
->nvm
.ops
));
1905 /* Initialize skew-specific constants */
1906 err
= ei
->get_invariants(hw
);
1910 /* setup the private structure */
1911 err
= igb_sw_init(adapter
);
1915 igb_get_bus_info_pcie(hw
);
1917 hw
->phy
.autoneg_wait_to_complete
= false;
1919 /* Copper options */
1920 if (hw
->phy
.media_type
== e1000_media_type_copper
) {
1921 hw
->phy
.mdix
= AUTO_ALL_MODES
;
1922 hw
->phy
.disable_polarity_correction
= false;
1923 hw
->phy
.ms_type
= e1000_ms_hw_default
;
1926 if (igb_check_reset_block(hw
))
1927 dev_info(&pdev
->dev
,
1928 "PHY reset is blocked due to SOL/IDER session.\n");
1931 * features is initialized to 0 in allocation, it might have bits
1932 * set by igb_sw_init so we should use an or instead of an
1935 netdev
->features
|= NETIF_F_SG
|
1942 NETIF_F_HW_VLAN_RX
|
1945 /* copy netdev features into list of user selectable features */
1946 netdev
->hw_features
|= netdev
->features
;
1948 /* set this bit last since it cannot be part of hw_features */
1949 netdev
->features
|= NETIF_F_HW_VLAN_FILTER
;
1951 netdev
->vlan_features
|= NETIF_F_TSO
|
1957 if (pci_using_dac
) {
1958 netdev
->features
|= NETIF_F_HIGHDMA
;
1959 netdev
->vlan_features
|= NETIF_F_HIGHDMA
;
1962 if (hw
->mac
.type
>= e1000_82576
) {
1963 netdev
->hw_features
|= NETIF_F_SCTP_CSUM
;
1964 netdev
->features
|= NETIF_F_SCTP_CSUM
;
1967 netdev
->priv_flags
|= IFF_UNICAST_FLT
;
1969 adapter
->en_mng_pt
= igb_enable_mng_pass_thru(hw
);
1971 /* before reading the NVM, reset the controller to put the device in a
1972 * known good starting state */
1973 hw
->mac
.ops
.reset_hw(hw
);
1975 /* make sure the NVM is good */
1976 if (hw
->nvm
.ops
.validate(hw
) < 0) {
1977 dev_err(&pdev
->dev
, "The NVM Checksum Is Not Valid\n");
1982 /* copy the MAC address out of the NVM */
1983 if (hw
->mac
.ops
.read_mac_addr(hw
))
1984 dev_err(&pdev
->dev
, "NVM Read Error\n");
1986 memcpy(netdev
->dev_addr
, hw
->mac
.addr
, netdev
->addr_len
);
1987 memcpy(netdev
->perm_addr
, hw
->mac
.addr
, netdev
->addr_len
);
1989 if (!is_valid_ether_addr(netdev
->perm_addr
)) {
1990 dev_err(&pdev
->dev
, "Invalid MAC Address\n");
1995 setup_timer(&adapter
->watchdog_timer
, igb_watchdog
,
1996 (unsigned long) adapter
);
1997 setup_timer(&adapter
->phy_info_timer
, igb_update_phy_info
,
1998 (unsigned long) adapter
);
2000 INIT_WORK(&adapter
->reset_task
, igb_reset_task
);
2001 INIT_WORK(&adapter
->watchdog_task
, igb_watchdog_task
);
2003 /* Initialize link properties that are user-changeable */
2004 adapter
->fc_autoneg
= true;
2005 hw
->mac
.autoneg
= true;
2006 hw
->phy
.autoneg_advertised
= 0x2f;
2008 hw
->fc
.requested_mode
= e1000_fc_default
;
2009 hw
->fc
.current_mode
= e1000_fc_default
;
2011 igb_validate_mdi_setting(hw
);
2013 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2014 * enable the ACPI Magic Packet filter
2017 if (hw
->bus
.func
== 0)
2018 hw
->nvm
.ops
.read(hw
, NVM_INIT_CONTROL3_PORT_A
, 1, &eeprom_data
);
2019 else if (hw
->mac
.type
>= e1000_82580
)
2020 hw
->nvm
.ops
.read(hw
, NVM_INIT_CONTROL3_PORT_A
+
2021 NVM_82580_LAN_FUNC_OFFSET(hw
->bus
.func
), 1,
2023 else if (hw
->bus
.func
== 1)
2024 hw
->nvm
.ops
.read(hw
, NVM_INIT_CONTROL3_PORT_B
, 1, &eeprom_data
);
2026 if (eeprom_data
& eeprom_apme_mask
)
2027 adapter
->eeprom_wol
|= E1000_WUFC_MAG
;
2029 /* now that we have the eeprom settings, apply the special cases where
2030 * the eeprom may be wrong or the board simply won't support wake on
2031 * lan on a particular port */
2032 switch (pdev
->device
) {
2033 case E1000_DEV_ID_82575GB_QUAD_COPPER
:
2034 adapter
->eeprom_wol
= 0;
2036 case E1000_DEV_ID_82575EB_FIBER_SERDES
:
2037 case E1000_DEV_ID_82576_FIBER
:
2038 case E1000_DEV_ID_82576_SERDES
:
2039 /* Wake events only supported on port A for dual fiber
2040 * regardless of eeprom setting */
2041 if (rd32(E1000_STATUS
) & E1000_STATUS_FUNC_1
)
2042 adapter
->eeprom_wol
= 0;
2044 case E1000_DEV_ID_82576_QUAD_COPPER
:
2045 case E1000_DEV_ID_82576_QUAD_COPPER_ET2
:
2046 /* if quad port adapter, disable WoL on all but port A */
2047 if (global_quad_port_a
!= 0)
2048 adapter
->eeprom_wol
= 0;
2050 adapter
->flags
|= IGB_FLAG_QUAD_PORT_A
;
2051 /* Reset for multiple quad port adapters */
2052 if (++global_quad_port_a
== 4)
2053 global_quad_port_a
= 0;
2057 /* initialize the wol settings based on the eeprom settings */
2058 adapter
->wol
= adapter
->eeprom_wol
;
2059 device_set_wakeup_enable(&adapter
->pdev
->dev
, adapter
->wol
);
2061 /* reset the hardware with the new settings */
2064 /* let the f/w know that the h/w is now under the control of the
2066 igb_get_hw_control(adapter
);
2068 strcpy(netdev
->name
, "eth%d");
2069 err
= register_netdev(netdev
);
2073 /* carrier off reporting is important to ethtool even BEFORE open */
2074 netif_carrier_off(netdev
);
2076 #ifdef CONFIG_IGB_DCA
2077 if (dca_add_requester(&pdev
->dev
) == 0) {
2078 adapter
->flags
|= IGB_FLAG_DCA_ENABLED
;
2079 dev_info(&pdev
->dev
, "DCA enabled\n");
2080 igb_setup_dca(adapter
);
2084 /* do hw tstamp init after resetting */
2085 igb_init_hw_timer(adapter
);
2087 dev_info(&pdev
->dev
, "Intel(R) Gigabit Ethernet Network Connection\n");
2088 /* print bus type/speed/width info */
2089 dev_info(&pdev
->dev
, "%s: (PCIe:%s:%s) %pM\n",
2091 ((hw
->bus
.speed
== e1000_bus_speed_2500
) ? "2.5Gb/s" :
2092 (hw
->bus
.speed
== e1000_bus_speed_5000
) ? "5.0Gb/s" :
2094 ((hw
->bus
.width
== e1000_bus_width_pcie_x4
) ? "Width x4" :
2095 (hw
->bus
.width
== e1000_bus_width_pcie_x2
) ? "Width x2" :
2096 (hw
->bus
.width
== e1000_bus_width_pcie_x1
) ? "Width x1" :
2100 ret_val
= igb_read_part_string(hw
, part_str
, E1000_PBANUM_LENGTH
);
2102 strcpy(part_str
, "Unknown");
2103 dev_info(&pdev
->dev
, "%s: PBA No: %s\n", netdev
->name
, part_str
);
2104 dev_info(&pdev
->dev
,
2105 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2106 adapter
->msix_entries
? "MSI-X" :
2107 (adapter
->flags
& IGB_FLAG_HAS_MSI
) ? "MSI" : "legacy",
2108 adapter
->num_rx_queues
, adapter
->num_tx_queues
);
2109 switch (hw
->mac
.type
) {
2111 igb_set_eee_i350(hw
);
2119 igb_release_hw_control(adapter
);
2121 if (!igb_check_reset_block(hw
))
2124 if (hw
->flash_address
)
2125 iounmap(hw
->flash_address
);
2127 igb_clear_interrupt_scheme(adapter
);
2128 iounmap(hw
->hw_addr
);
2130 free_netdev(netdev
);
2132 pci_release_selected_regions(pdev
,
2133 pci_select_bars(pdev
, IORESOURCE_MEM
));
2136 pci_disable_device(pdev
);
2141 * igb_remove - Device Removal Routine
2142 * @pdev: PCI device information struct
2144 * igb_remove is called by the PCI subsystem to alert the driver
2145 * that it should release a PCI device. The could be caused by a
2146 * Hot-Plug event, or because the driver is going to be removed from
2149 static void __devexit
igb_remove(struct pci_dev
*pdev
)
2151 struct net_device
*netdev
= pci_get_drvdata(pdev
);
2152 struct igb_adapter
*adapter
= netdev_priv(netdev
);
2153 struct e1000_hw
*hw
= &adapter
->hw
;
2156 * The watchdog timer may be rescheduled, so explicitly
2157 * disable watchdog from being rescheduled.
2159 set_bit(__IGB_DOWN
, &adapter
->state
);
2160 del_timer_sync(&adapter
->watchdog_timer
);
2161 del_timer_sync(&adapter
->phy_info_timer
);
2163 cancel_work_sync(&adapter
->reset_task
);
2164 cancel_work_sync(&adapter
->watchdog_task
);
2166 #ifdef CONFIG_IGB_DCA
2167 if (adapter
->flags
& IGB_FLAG_DCA_ENABLED
) {
2168 dev_info(&pdev
->dev
, "DCA disabled\n");
2169 dca_remove_requester(&pdev
->dev
);
2170 adapter
->flags
&= ~IGB_FLAG_DCA_ENABLED
;
2171 wr32(E1000_DCA_CTRL
, E1000_DCA_CTRL_DCA_MODE_DISABLE
);
2175 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2176 * would have already happened in close and is redundant. */
2177 igb_release_hw_control(adapter
);
2179 unregister_netdev(netdev
);
2181 igb_clear_interrupt_scheme(adapter
);
2183 #ifdef CONFIG_PCI_IOV
2184 /* reclaim resources allocated to VFs */
2185 if (adapter
->vf_data
) {
2186 /* disable iov and allow time for transactions to clear */
2187 if (!igb_check_vf_assignment(adapter
)) {
2188 pci_disable_sriov(pdev
);
2191 dev_info(&pdev
->dev
, "VF(s) assigned to guests!\n");
2194 kfree(adapter
->vf_data
);
2195 adapter
->vf_data
= NULL
;
2196 wr32(E1000_IOVCTL
, E1000_IOVCTL_REUSE_VFQ
);
2199 dev_info(&pdev
->dev
, "IOV Disabled\n");
2203 iounmap(hw
->hw_addr
);
2204 if (hw
->flash_address
)
2205 iounmap(hw
->flash_address
);
2206 pci_release_selected_regions(pdev
,
2207 pci_select_bars(pdev
, IORESOURCE_MEM
));
2209 free_netdev(netdev
);
2211 pci_disable_pcie_error_reporting(pdev
);
2213 pci_disable_device(pdev
);
2217 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2218 * @adapter: board private structure to initialize
2220 * This function initializes the vf specific data storage and then attempts to
2221 * allocate the VFs. The reason for ordering it this way is because it is much
2222 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2223 * the memory for the VFs.
2225 static void __devinit
igb_probe_vfs(struct igb_adapter
* adapter
)
2227 #ifdef CONFIG_PCI_IOV
2228 struct pci_dev
*pdev
= adapter
->pdev
;
2229 int old_vfs
= igb_find_enabled_vfs(adapter
);
2233 dev_info(&pdev
->dev
, "%d pre-allocated VFs found - override "
2234 "max_vfs setting of %d\n", old_vfs
, max_vfs
);
2235 adapter
->vfs_allocated_count
= old_vfs
;
2238 if (!adapter
->vfs_allocated_count
)
2241 adapter
->vf_data
= kcalloc(adapter
->vfs_allocated_count
,
2242 sizeof(struct vf_data_storage
), GFP_KERNEL
);
2243 /* if allocation failed then we do not support SR-IOV */
2244 if (!adapter
->vf_data
) {
2245 adapter
->vfs_allocated_count
= 0;
2246 dev_err(&pdev
->dev
, "Unable to allocate memory for VF "
2252 if (pci_enable_sriov(pdev
, adapter
->vfs_allocated_count
))
2255 dev_info(&pdev
->dev
, "%d VFs allocated\n",
2256 adapter
->vfs_allocated_count
);
2257 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++)
2258 igb_vf_configure(adapter
, i
);
2260 /* DMA Coalescing is not supported in IOV mode. */
2261 adapter
->flags
&= ~IGB_FLAG_DMAC
;
2264 kfree(adapter
->vf_data
);
2265 adapter
->vf_data
= NULL
;
2266 adapter
->vfs_allocated_count
= 0;
2269 #endif /* CONFIG_PCI_IOV */
2273 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2274 * @adapter: board private structure to initialize
2276 * igb_init_hw_timer initializes the function pointer and values for the hw
2277 * timer found in hardware.
2279 static void igb_init_hw_timer(struct igb_adapter
*adapter
)
2281 struct e1000_hw
*hw
= &adapter
->hw
;
2283 switch (hw
->mac
.type
) {
2286 memset(&adapter
->cycles
, 0, sizeof(adapter
->cycles
));
2287 adapter
->cycles
.read
= igb_read_clock
;
2288 adapter
->cycles
.mask
= CLOCKSOURCE_MASK(64);
2289 adapter
->cycles
.mult
= 1;
2291 * The 82580 timesync updates the system timer every 8ns by 8ns
2292 * and the value cannot be shifted. Instead we need to shift
2293 * the registers to generate a 64bit timer value. As a result
2294 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2295 * 24 in order to generate a larger value for synchronization.
2297 adapter
->cycles
.shift
= IGB_82580_TSYNC_SHIFT
;
2298 /* disable system timer temporarily by setting bit 31 */
2299 wr32(E1000_TSAUXC
, 0x80000000);
2302 /* Set registers so that rollover occurs soon to test this. */
2303 wr32(E1000_SYSTIMR
, 0x00000000);
2304 wr32(E1000_SYSTIML
, 0x80000000);
2305 wr32(E1000_SYSTIMH
, 0x000000FF);
2308 /* enable system timer by clearing bit 31 */
2309 wr32(E1000_TSAUXC
, 0x0);
2312 timecounter_init(&adapter
->clock
,
2314 ktime_to_ns(ktime_get_real()));
2316 * Synchronize our NIC clock against system wall clock. NIC
2317 * time stamp reading requires ~3us per sample, each sample
2318 * was pretty stable even under load => only require 10
2319 * samples for each offset comparison.
2321 memset(&adapter
->compare
, 0, sizeof(adapter
->compare
));
2322 adapter
->compare
.source
= &adapter
->clock
;
2323 adapter
->compare
.target
= ktime_get_real
;
2324 adapter
->compare
.num_samples
= 10;
2325 timecompare_update(&adapter
->compare
, 0);
2329 * Initialize hardware timer: we keep it running just in case
2330 * that some program needs it later on.
2332 memset(&adapter
->cycles
, 0, sizeof(adapter
->cycles
));
2333 adapter
->cycles
.read
= igb_read_clock
;
2334 adapter
->cycles
.mask
= CLOCKSOURCE_MASK(64);
2335 adapter
->cycles
.mult
= 1;
2337 * Scale the NIC clock cycle by a large factor so that
2338 * relatively small clock corrections can be added or
2339 * subtracted at each clock tick. The drawbacks of a large
2340 * factor are a) that the clock register overflows more quickly
2341 * (not such a big deal) and b) that the increment per tick has
2342 * to fit into 24 bits. As a result we need to use a shift of
2343 * 19 so we can fit a value of 16 into the TIMINCA register.
2345 adapter
->cycles
.shift
= IGB_82576_TSYNC_SHIFT
;
2347 (1 << E1000_TIMINCA_16NS_SHIFT
) |
2348 (16 << IGB_82576_TSYNC_SHIFT
));
2350 /* Set registers so that rollover occurs soon to test this. */
2351 wr32(E1000_SYSTIML
, 0x00000000);
2352 wr32(E1000_SYSTIMH
, 0xFF800000);
2355 timecounter_init(&adapter
->clock
,
2357 ktime_to_ns(ktime_get_real()));
2359 * Synchronize our NIC clock against system wall clock. NIC
2360 * time stamp reading requires ~3us per sample, each sample
2361 * was pretty stable even under load => only require 10
2362 * samples for each offset comparison.
2364 memset(&adapter
->compare
, 0, sizeof(adapter
->compare
));
2365 adapter
->compare
.source
= &adapter
->clock
;
2366 adapter
->compare
.target
= ktime_get_real
;
2367 adapter
->compare
.num_samples
= 10;
2368 timecompare_update(&adapter
->compare
, 0);
2371 /* 82575 does not support timesync */
2379 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2380 * @adapter: board private structure to initialize
2382 * igb_sw_init initializes the Adapter private data structure.
2383 * Fields are initialized based on PCI device information and
2384 * OS network device settings (MTU size).
2386 static int __devinit
igb_sw_init(struct igb_adapter
*adapter
)
2388 struct e1000_hw
*hw
= &adapter
->hw
;
2389 struct net_device
*netdev
= adapter
->netdev
;
2390 struct pci_dev
*pdev
= adapter
->pdev
;
2392 pci_read_config_word(pdev
, PCI_COMMAND
, &hw
->bus
.pci_cmd_word
);
2394 /* set default ring sizes */
2395 adapter
->tx_ring_count
= IGB_DEFAULT_TXD
;
2396 adapter
->rx_ring_count
= IGB_DEFAULT_RXD
;
2398 /* set default ITR values */
2399 adapter
->rx_itr_setting
= IGB_DEFAULT_ITR
;
2400 adapter
->tx_itr_setting
= IGB_DEFAULT_ITR
;
2402 /* set default work limits */
2403 adapter
->tx_work_limit
= IGB_DEFAULT_TX_WORK
;
2405 adapter
->max_frame_size
= netdev
->mtu
+ ETH_HLEN
+ ETH_FCS_LEN
+
2407 adapter
->min_frame_size
= ETH_ZLEN
+ ETH_FCS_LEN
;
2411 spin_lock_init(&adapter
->stats64_lock
);
2412 #ifdef CONFIG_PCI_IOV
2413 switch (hw
->mac
.type
) {
2417 dev_warn(&pdev
->dev
,
2418 "Maximum of 7 VFs per PF, using max\n");
2419 adapter
->vfs_allocated_count
= 7;
2421 adapter
->vfs_allocated_count
= max_vfs
;
2426 #endif /* CONFIG_PCI_IOV */
2427 adapter
->rss_queues
= min_t(u32
, IGB_MAX_RX_QUEUES
, num_online_cpus());
2428 /* i350 cannot do RSS and SR-IOV at the same time */
2429 if (hw
->mac
.type
== e1000_i350
&& adapter
->vfs_allocated_count
)
2430 adapter
->rss_queues
= 1;
2433 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2434 * then we should combine the queues into a queue pair in order to
2435 * conserve interrupts due to limited supply
2437 if ((adapter
->rss_queues
> 4) ||
2438 ((adapter
->rss_queues
> 1) && (adapter
->vfs_allocated_count
> 6)))
2439 adapter
->flags
|= IGB_FLAG_QUEUE_PAIRS
;
2441 /* This call may decrease the number of queues */
2442 if (igb_init_interrupt_scheme(adapter
)) {
2443 dev_err(&pdev
->dev
, "Unable to allocate memory for queues\n");
2447 igb_probe_vfs(adapter
);
2449 /* Explicitly disable IRQ since the NIC can be in any state. */
2450 igb_irq_disable(adapter
);
2452 if (hw
->mac
.type
== e1000_i350
)
2453 adapter
->flags
&= ~IGB_FLAG_DMAC
;
2455 set_bit(__IGB_DOWN
, &adapter
->state
);
2460 * igb_open - Called when a network interface is made active
2461 * @netdev: network interface device structure
2463 * Returns 0 on success, negative value on failure
2465 * The open entry point is called when a network interface is made
2466 * active by the system (IFF_UP). At this point all resources needed
2467 * for transmit and receive operations are allocated, the interrupt
2468 * handler is registered with the OS, the watchdog timer is started,
2469 * and the stack is notified that the interface is ready.
2471 static int igb_open(struct net_device
*netdev
)
2473 struct igb_adapter
*adapter
= netdev_priv(netdev
);
2474 struct e1000_hw
*hw
= &adapter
->hw
;
2478 /* disallow open during test */
2479 if (test_bit(__IGB_TESTING
, &adapter
->state
))
2482 netif_carrier_off(netdev
);
2484 /* allocate transmit descriptors */
2485 err
= igb_setup_all_tx_resources(adapter
);
2489 /* allocate receive descriptors */
2490 err
= igb_setup_all_rx_resources(adapter
);
2494 igb_power_up_link(adapter
);
2496 /* before we allocate an interrupt, we must be ready to handle it.
2497 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2498 * as soon as we call pci_request_irq, so we have to setup our
2499 * clean_rx handler before we do so. */
2500 igb_configure(adapter
);
2502 err
= igb_request_irq(adapter
);
2506 /* From here on the code is the same as igb_up() */
2507 clear_bit(__IGB_DOWN
, &adapter
->state
);
2509 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
2510 napi_enable(&(adapter
->q_vector
[i
]->napi
));
2512 /* Clear any pending interrupts. */
2515 igb_irq_enable(adapter
);
2517 /* notify VFs that reset has been completed */
2518 if (adapter
->vfs_allocated_count
) {
2519 u32 reg_data
= rd32(E1000_CTRL_EXT
);
2520 reg_data
|= E1000_CTRL_EXT_PFRSTD
;
2521 wr32(E1000_CTRL_EXT
, reg_data
);
2524 netif_tx_start_all_queues(netdev
);
2526 /* start the watchdog. */
2527 hw
->mac
.get_link_status
= 1;
2528 schedule_work(&adapter
->watchdog_task
);
2533 igb_release_hw_control(adapter
);
2534 igb_power_down_link(adapter
);
2535 igb_free_all_rx_resources(adapter
);
2537 igb_free_all_tx_resources(adapter
);
2545 * igb_close - Disables a network interface
2546 * @netdev: network interface device structure
2548 * Returns 0, this is not allowed to fail
2550 * The close entry point is called when an interface is de-activated
2551 * by the OS. The hardware is still under the driver's control, but
2552 * needs to be disabled. A global MAC reset is issued to stop the
2553 * hardware, and all transmit and receive resources are freed.
2555 static int igb_close(struct net_device
*netdev
)
2557 struct igb_adapter
*adapter
= netdev_priv(netdev
);
2559 WARN_ON(test_bit(__IGB_RESETTING
, &adapter
->state
));
2562 igb_free_irq(adapter
);
2564 igb_free_all_tx_resources(adapter
);
2565 igb_free_all_rx_resources(adapter
);
2571 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2572 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2574 * Return 0 on success, negative on failure
2576 int igb_setup_tx_resources(struct igb_ring
*tx_ring
)
2578 struct device
*dev
= tx_ring
->dev
;
2579 int orig_node
= dev_to_node(dev
);
2582 size
= sizeof(struct igb_tx_buffer
) * tx_ring
->count
;
2583 tx_ring
->tx_buffer_info
= vzalloc_node(size
, tx_ring
->numa_node
);
2584 if (!tx_ring
->tx_buffer_info
)
2585 tx_ring
->tx_buffer_info
= vzalloc(size
);
2586 if (!tx_ring
->tx_buffer_info
)
2589 /* round up to nearest 4K */
2590 tx_ring
->size
= tx_ring
->count
* sizeof(union e1000_adv_tx_desc
);
2591 tx_ring
->size
= ALIGN(tx_ring
->size
, 4096);
2593 set_dev_node(dev
, tx_ring
->numa_node
);
2594 tx_ring
->desc
= dma_alloc_coherent(dev
,
2598 set_dev_node(dev
, orig_node
);
2600 tx_ring
->desc
= dma_alloc_coherent(dev
,
2608 tx_ring
->next_to_use
= 0;
2609 tx_ring
->next_to_clean
= 0;
2614 vfree(tx_ring
->tx_buffer_info
);
2616 "Unable to allocate memory for the transmit descriptor ring\n");
2621 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2622 * (Descriptors) for all queues
2623 * @adapter: board private structure
2625 * Return 0 on success, negative on failure
2627 static int igb_setup_all_tx_resources(struct igb_adapter
*adapter
)
2629 struct pci_dev
*pdev
= adapter
->pdev
;
2632 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
2633 err
= igb_setup_tx_resources(adapter
->tx_ring
[i
]);
2636 "Allocation for Tx Queue %u failed\n", i
);
2637 for (i
--; i
>= 0; i
--)
2638 igb_free_tx_resources(adapter
->tx_ring
[i
]);
2647 * igb_setup_tctl - configure the transmit control registers
2648 * @adapter: Board private structure
2650 void igb_setup_tctl(struct igb_adapter
*adapter
)
2652 struct e1000_hw
*hw
= &adapter
->hw
;
2655 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2656 wr32(E1000_TXDCTL(0), 0);
2658 /* Program the Transmit Control Register */
2659 tctl
= rd32(E1000_TCTL
);
2660 tctl
&= ~E1000_TCTL_CT
;
2661 tctl
|= E1000_TCTL_PSP
| E1000_TCTL_RTLC
|
2662 (E1000_COLLISION_THRESHOLD
<< E1000_CT_SHIFT
);
2664 igb_config_collision_dist(hw
);
2666 /* Enable transmits */
2667 tctl
|= E1000_TCTL_EN
;
2669 wr32(E1000_TCTL
, tctl
);
2673 * igb_configure_tx_ring - Configure transmit ring after Reset
2674 * @adapter: board private structure
2675 * @ring: tx ring to configure
2677 * Configure a transmit ring after a reset.
2679 void igb_configure_tx_ring(struct igb_adapter
*adapter
,
2680 struct igb_ring
*ring
)
2682 struct e1000_hw
*hw
= &adapter
->hw
;
2684 u64 tdba
= ring
->dma
;
2685 int reg_idx
= ring
->reg_idx
;
2687 /* disable the queue */
2688 wr32(E1000_TXDCTL(reg_idx
), 0);
2692 wr32(E1000_TDLEN(reg_idx
),
2693 ring
->count
* sizeof(union e1000_adv_tx_desc
));
2694 wr32(E1000_TDBAL(reg_idx
),
2695 tdba
& 0x00000000ffffffffULL
);
2696 wr32(E1000_TDBAH(reg_idx
), tdba
>> 32);
2698 ring
->tail
= hw
->hw_addr
+ E1000_TDT(reg_idx
);
2699 wr32(E1000_TDH(reg_idx
), 0);
2700 writel(0, ring
->tail
);
2702 txdctl
|= IGB_TX_PTHRESH
;
2703 txdctl
|= IGB_TX_HTHRESH
<< 8;
2704 txdctl
|= IGB_TX_WTHRESH
<< 16;
2706 txdctl
|= E1000_TXDCTL_QUEUE_ENABLE
;
2707 wr32(E1000_TXDCTL(reg_idx
), txdctl
);
2711 * igb_configure_tx - Configure transmit Unit after Reset
2712 * @adapter: board private structure
2714 * Configure the Tx unit of the MAC after a reset.
2716 static void igb_configure_tx(struct igb_adapter
*adapter
)
2720 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
2721 igb_configure_tx_ring(adapter
, adapter
->tx_ring
[i
]);
2725 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2726 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2728 * Returns 0 on success, negative on failure
2730 int igb_setup_rx_resources(struct igb_ring
*rx_ring
)
2732 struct device
*dev
= rx_ring
->dev
;
2733 int orig_node
= dev_to_node(dev
);
2736 size
= sizeof(struct igb_rx_buffer
) * rx_ring
->count
;
2737 rx_ring
->rx_buffer_info
= vzalloc_node(size
, rx_ring
->numa_node
);
2738 if (!rx_ring
->rx_buffer_info
)
2739 rx_ring
->rx_buffer_info
= vzalloc(size
);
2740 if (!rx_ring
->rx_buffer_info
)
2743 desc_len
= sizeof(union e1000_adv_rx_desc
);
2745 /* Round up to nearest 4K */
2746 rx_ring
->size
= rx_ring
->count
* desc_len
;
2747 rx_ring
->size
= ALIGN(rx_ring
->size
, 4096);
2749 set_dev_node(dev
, rx_ring
->numa_node
);
2750 rx_ring
->desc
= dma_alloc_coherent(dev
,
2754 set_dev_node(dev
, orig_node
);
2756 rx_ring
->desc
= dma_alloc_coherent(dev
,
2764 rx_ring
->next_to_clean
= 0;
2765 rx_ring
->next_to_use
= 0;
2770 vfree(rx_ring
->rx_buffer_info
);
2771 rx_ring
->rx_buffer_info
= NULL
;
2772 dev_err(dev
, "Unable to allocate memory for the receive descriptor"
2778 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2779 * (Descriptors) for all queues
2780 * @adapter: board private structure
2782 * Return 0 on success, negative on failure
2784 static int igb_setup_all_rx_resources(struct igb_adapter
*adapter
)
2786 struct pci_dev
*pdev
= adapter
->pdev
;
2789 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
2790 err
= igb_setup_rx_resources(adapter
->rx_ring
[i
]);
2793 "Allocation for Rx Queue %u failed\n", i
);
2794 for (i
--; i
>= 0; i
--)
2795 igb_free_rx_resources(adapter
->rx_ring
[i
]);
2804 * igb_setup_mrqc - configure the multiple receive queue control registers
2805 * @adapter: Board private structure
2807 static void igb_setup_mrqc(struct igb_adapter
*adapter
)
2809 struct e1000_hw
*hw
= &adapter
->hw
;
2811 u32 j
, num_rx_queues
, shift
= 0, shift2
= 0;
2816 static const u8 rsshash
[40] = {
2817 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2818 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2819 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2820 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2822 /* Fill out hash function seeds */
2823 for (j
= 0; j
< 10; j
++) {
2824 u32 rsskey
= rsshash
[(j
* 4)];
2825 rsskey
|= rsshash
[(j
* 4) + 1] << 8;
2826 rsskey
|= rsshash
[(j
* 4) + 2] << 16;
2827 rsskey
|= rsshash
[(j
* 4) + 3] << 24;
2828 array_wr32(E1000_RSSRK(0), j
, rsskey
);
2831 num_rx_queues
= adapter
->rss_queues
;
2833 if (adapter
->vfs_allocated_count
) {
2834 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2835 switch (hw
->mac
.type
) {
2852 if (hw
->mac
.type
== e1000_82575
)
2856 for (j
= 0; j
< (32 * 4); j
++) {
2857 reta
.bytes
[j
& 3] = (j
% num_rx_queues
) << shift
;
2859 reta
.bytes
[j
& 3] |= num_rx_queues
<< shift2
;
2861 wr32(E1000_RETA(j
>> 2), reta
.dword
);
2865 * Disable raw packet checksumming so that RSS hash is placed in
2866 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2867 * offloads as they are enabled by default
2869 rxcsum
= rd32(E1000_RXCSUM
);
2870 rxcsum
|= E1000_RXCSUM_PCSD
;
2872 if (adapter
->hw
.mac
.type
>= e1000_82576
)
2873 /* Enable Receive Checksum Offload for SCTP */
2874 rxcsum
|= E1000_RXCSUM_CRCOFL
;
2876 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2877 wr32(E1000_RXCSUM
, rxcsum
);
2879 /* If VMDq is enabled then we set the appropriate mode for that, else
2880 * we default to RSS so that an RSS hash is calculated per packet even
2881 * if we are only using one queue */
2882 if (adapter
->vfs_allocated_count
) {
2883 if (hw
->mac
.type
> e1000_82575
) {
2884 /* Set the default pool for the PF's first queue */
2885 u32 vtctl
= rd32(E1000_VT_CTL
);
2886 vtctl
&= ~(E1000_VT_CTL_DEFAULT_POOL_MASK
|
2887 E1000_VT_CTL_DISABLE_DEF_POOL
);
2888 vtctl
|= adapter
->vfs_allocated_count
<<
2889 E1000_VT_CTL_DEFAULT_POOL_SHIFT
;
2890 wr32(E1000_VT_CTL
, vtctl
);
2892 if (adapter
->rss_queues
> 1)
2893 mrqc
= E1000_MRQC_ENABLE_VMDQ_RSS_2Q
;
2895 mrqc
= E1000_MRQC_ENABLE_VMDQ
;
2897 mrqc
= E1000_MRQC_ENABLE_RSS_4Q
;
2899 igb_vmm_control(adapter
);
2902 * Generate RSS hash based on TCP port numbers and/or
2903 * IPv4/v6 src and dst addresses since UDP cannot be
2904 * hashed reliably due to IP fragmentation
2906 mrqc
|= E1000_MRQC_RSS_FIELD_IPV4
|
2907 E1000_MRQC_RSS_FIELD_IPV4_TCP
|
2908 E1000_MRQC_RSS_FIELD_IPV6
|
2909 E1000_MRQC_RSS_FIELD_IPV6_TCP
|
2910 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX
;
2912 wr32(E1000_MRQC
, mrqc
);
2916 * igb_setup_rctl - configure the receive control registers
2917 * @adapter: Board private structure
2919 void igb_setup_rctl(struct igb_adapter
*adapter
)
2921 struct e1000_hw
*hw
= &adapter
->hw
;
2924 rctl
= rd32(E1000_RCTL
);
2926 rctl
&= ~(3 << E1000_RCTL_MO_SHIFT
);
2927 rctl
&= ~(E1000_RCTL_LBM_TCVR
| E1000_RCTL_LBM_MAC
);
2929 rctl
|= E1000_RCTL_EN
| E1000_RCTL_BAM
| E1000_RCTL_RDMTS_HALF
|
2930 (hw
->mac
.mc_filter_type
<< E1000_RCTL_MO_SHIFT
);
2933 * enable stripping of CRC. It's unlikely this will break BMC
2934 * redirection as it did with e1000. Newer features require
2935 * that the HW strips the CRC.
2937 rctl
|= E1000_RCTL_SECRC
;
2939 /* disable store bad packets and clear size bits. */
2940 rctl
&= ~(E1000_RCTL_SBP
| E1000_RCTL_SZ_256
);
2942 /* enable LPE to prevent packets larger than max_frame_size */
2943 rctl
|= E1000_RCTL_LPE
;
2945 /* disable queue 0 to prevent tail write w/o re-config */
2946 wr32(E1000_RXDCTL(0), 0);
2948 /* Attention!!! For SR-IOV PF driver operations you must enable
2949 * queue drop for all VF and PF queues to prevent head of line blocking
2950 * if an un-trusted VF does not provide descriptors to hardware.
2952 if (adapter
->vfs_allocated_count
) {
2953 /* set all queue drop enable bits */
2954 wr32(E1000_QDE
, ALL_QUEUES
);
2957 wr32(E1000_RCTL
, rctl
);
2960 static inline int igb_set_vf_rlpml(struct igb_adapter
*adapter
, int size
,
2963 struct e1000_hw
*hw
= &adapter
->hw
;
2966 /* if it isn't the PF check to see if VFs are enabled and
2967 * increase the size to support vlan tags */
2968 if (vfn
< adapter
->vfs_allocated_count
&&
2969 adapter
->vf_data
[vfn
].vlans_enabled
)
2970 size
+= VLAN_TAG_SIZE
;
2972 vmolr
= rd32(E1000_VMOLR(vfn
));
2973 vmolr
&= ~E1000_VMOLR_RLPML_MASK
;
2974 vmolr
|= size
| E1000_VMOLR_LPE
;
2975 wr32(E1000_VMOLR(vfn
), vmolr
);
2981 * igb_rlpml_set - set maximum receive packet size
2982 * @adapter: board private structure
2984 * Configure maximum receivable packet size.
2986 static void igb_rlpml_set(struct igb_adapter
*adapter
)
2988 u32 max_frame_size
= adapter
->max_frame_size
;
2989 struct e1000_hw
*hw
= &adapter
->hw
;
2990 u16 pf_id
= adapter
->vfs_allocated_count
;
2993 igb_set_vf_rlpml(adapter
, max_frame_size
, pf_id
);
2995 * If we're in VMDQ or SR-IOV mode, then set global RLPML
2996 * to our max jumbo frame size, in case we need to enable
2997 * jumbo frames on one of the rings later.
2998 * This will not pass over-length frames into the default
2999 * queue because it's gated by the VMOLR.RLPML.
3001 max_frame_size
= MAX_JUMBO_FRAME_SIZE
;
3004 wr32(E1000_RLPML
, max_frame_size
);
3007 static inline void igb_set_vmolr(struct igb_adapter
*adapter
,
3010 struct e1000_hw
*hw
= &adapter
->hw
;
3014 * This register exists only on 82576 and newer so if we are older then
3015 * we should exit and do nothing
3017 if (hw
->mac
.type
< e1000_82576
)
3020 vmolr
= rd32(E1000_VMOLR(vfn
));
3021 vmolr
|= E1000_VMOLR_STRVLAN
; /* Strip vlan tags */
3023 vmolr
|= E1000_VMOLR_AUPE
; /* Accept untagged packets */
3025 vmolr
&= ~(E1000_VMOLR_AUPE
); /* Tagged packets ONLY */
3027 /* clear all bits that might not be set */
3028 vmolr
&= ~(E1000_VMOLR_BAM
| E1000_VMOLR_RSSE
);
3030 if (adapter
->rss_queues
> 1 && vfn
== adapter
->vfs_allocated_count
)
3031 vmolr
|= E1000_VMOLR_RSSE
; /* enable RSS */
3033 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3036 if (vfn
<= adapter
->vfs_allocated_count
)
3037 vmolr
|= E1000_VMOLR_BAM
; /* Accept broadcast */
3039 wr32(E1000_VMOLR(vfn
), vmolr
);
3043 * igb_configure_rx_ring - Configure a receive ring after Reset
3044 * @adapter: board private structure
3045 * @ring: receive ring to be configured
3047 * Configure the Rx unit of the MAC after a reset.
3049 void igb_configure_rx_ring(struct igb_adapter
*adapter
,
3050 struct igb_ring
*ring
)
3052 struct e1000_hw
*hw
= &adapter
->hw
;
3053 u64 rdba
= ring
->dma
;
3054 int reg_idx
= ring
->reg_idx
;
3055 u32 srrctl
= 0, rxdctl
= 0;
3057 /* disable the queue */
3058 wr32(E1000_RXDCTL(reg_idx
), 0);
3060 /* Set DMA base address registers */
3061 wr32(E1000_RDBAL(reg_idx
),
3062 rdba
& 0x00000000ffffffffULL
);
3063 wr32(E1000_RDBAH(reg_idx
), rdba
>> 32);
3064 wr32(E1000_RDLEN(reg_idx
),
3065 ring
->count
* sizeof(union e1000_adv_rx_desc
));
3067 /* initialize head and tail */
3068 ring
->tail
= hw
->hw_addr
+ E1000_RDT(reg_idx
);
3069 wr32(E1000_RDH(reg_idx
), 0);
3070 writel(0, ring
->tail
);
3072 /* set descriptor configuration */
3073 srrctl
= IGB_RX_HDR_LEN
<< E1000_SRRCTL_BSIZEHDRSIZE_SHIFT
;
3074 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3075 srrctl
|= IGB_RXBUFFER_16384
>> E1000_SRRCTL_BSIZEPKT_SHIFT
;
3077 srrctl
|= (PAGE_SIZE
/ 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT
;
3079 srrctl
|= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS
;
3080 if (hw
->mac
.type
>= e1000_82580
)
3081 srrctl
|= E1000_SRRCTL_TIMESTAMP
;
3082 /* Only set Drop Enable if we are supporting multiple queues */
3083 if (adapter
->vfs_allocated_count
|| adapter
->num_rx_queues
> 1)
3084 srrctl
|= E1000_SRRCTL_DROP_EN
;
3086 wr32(E1000_SRRCTL(reg_idx
), srrctl
);
3088 /* set filtering for VMDQ pools */
3089 igb_set_vmolr(adapter
, reg_idx
& 0x7, true);
3091 rxdctl
|= IGB_RX_PTHRESH
;
3092 rxdctl
|= IGB_RX_HTHRESH
<< 8;
3093 rxdctl
|= IGB_RX_WTHRESH
<< 16;
3095 /* enable receive descriptor fetching */
3096 rxdctl
|= E1000_RXDCTL_QUEUE_ENABLE
;
3097 wr32(E1000_RXDCTL(reg_idx
), rxdctl
);
3101 * igb_configure_rx - Configure receive Unit after Reset
3102 * @adapter: board private structure
3104 * Configure the Rx unit of the MAC after a reset.
3106 static void igb_configure_rx(struct igb_adapter
*adapter
)
3110 /* set UTA to appropriate mode */
3111 igb_set_uta(adapter
);
3113 /* set the correct pool for the PF default MAC address in entry 0 */
3114 igb_rar_set_qsel(adapter
, adapter
->hw
.mac
.addr
, 0,
3115 adapter
->vfs_allocated_count
);
3117 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3118 * the Base and Length of the Rx Descriptor Ring */
3119 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
3120 igb_configure_rx_ring(adapter
, adapter
->rx_ring
[i
]);
3124 * igb_free_tx_resources - Free Tx Resources per Queue
3125 * @tx_ring: Tx descriptor ring for a specific queue
3127 * Free all transmit software resources
3129 void igb_free_tx_resources(struct igb_ring
*tx_ring
)
3131 igb_clean_tx_ring(tx_ring
);
3133 vfree(tx_ring
->tx_buffer_info
);
3134 tx_ring
->tx_buffer_info
= NULL
;
3136 /* if not set, then don't free */
3140 dma_free_coherent(tx_ring
->dev
, tx_ring
->size
,
3141 tx_ring
->desc
, tx_ring
->dma
);
3143 tx_ring
->desc
= NULL
;
3147 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3148 * @adapter: board private structure
3150 * Free all transmit software resources
3152 static void igb_free_all_tx_resources(struct igb_adapter
*adapter
)
3156 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
3157 igb_free_tx_resources(adapter
->tx_ring
[i
]);
3160 void igb_unmap_and_free_tx_resource(struct igb_ring
*ring
,
3161 struct igb_tx_buffer
*tx_buffer
)
3163 if (tx_buffer
->skb
) {
3164 dev_kfree_skb_any(tx_buffer
->skb
);
3166 dma_unmap_single(ring
->dev
,
3170 } else if (tx_buffer
->dma
) {
3171 dma_unmap_page(ring
->dev
,
3176 tx_buffer
->next_to_watch
= NULL
;
3177 tx_buffer
->skb
= NULL
;
3179 /* buffer_info must be completely set up in the transmit path */
3183 * igb_clean_tx_ring - Free Tx Buffers
3184 * @tx_ring: ring to be cleaned
3186 static void igb_clean_tx_ring(struct igb_ring
*tx_ring
)
3188 struct igb_tx_buffer
*buffer_info
;
3192 if (!tx_ring
->tx_buffer_info
)
3194 /* Free all the Tx ring sk_buffs */
3196 for (i
= 0; i
< tx_ring
->count
; i
++) {
3197 buffer_info
= &tx_ring
->tx_buffer_info
[i
];
3198 igb_unmap_and_free_tx_resource(tx_ring
, buffer_info
);
3201 size
= sizeof(struct igb_tx_buffer
) * tx_ring
->count
;
3202 memset(tx_ring
->tx_buffer_info
, 0, size
);
3204 /* Zero out the descriptor ring */
3205 memset(tx_ring
->desc
, 0, tx_ring
->size
);
3207 tx_ring
->next_to_use
= 0;
3208 tx_ring
->next_to_clean
= 0;
3212 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3213 * @adapter: board private structure
3215 static void igb_clean_all_tx_rings(struct igb_adapter
*adapter
)
3219 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
3220 igb_clean_tx_ring(adapter
->tx_ring
[i
]);
3224 * igb_free_rx_resources - Free Rx Resources
3225 * @rx_ring: ring to clean the resources from
3227 * Free all receive software resources
3229 void igb_free_rx_resources(struct igb_ring
*rx_ring
)
3231 igb_clean_rx_ring(rx_ring
);
3233 vfree(rx_ring
->rx_buffer_info
);
3234 rx_ring
->rx_buffer_info
= NULL
;
3236 /* if not set, then don't free */
3240 dma_free_coherent(rx_ring
->dev
, rx_ring
->size
,
3241 rx_ring
->desc
, rx_ring
->dma
);
3243 rx_ring
->desc
= NULL
;
3247 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3248 * @adapter: board private structure
3250 * Free all receive software resources
3252 static void igb_free_all_rx_resources(struct igb_adapter
*adapter
)
3256 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
3257 igb_free_rx_resources(adapter
->rx_ring
[i
]);
3261 * igb_clean_rx_ring - Free Rx Buffers per Queue
3262 * @rx_ring: ring to free buffers from
3264 static void igb_clean_rx_ring(struct igb_ring
*rx_ring
)
3269 if (!rx_ring
->rx_buffer_info
)
3272 /* Free all the Rx ring sk_buffs */
3273 for (i
= 0; i
< rx_ring
->count
; i
++) {
3274 struct igb_rx_buffer
*buffer_info
= &rx_ring
->rx_buffer_info
[i
];
3275 if (buffer_info
->dma
) {
3276 dma_unmap_single(rx_ring
->dev
,
3280 buffer_info
->dma
= 0;
3283 if (buffer_info
->skb
) {
3284 dev_kfree_skb(buffer_info
->skb
);
3285 buffer_info
->skb
= NULL
;
3287 if (buffer_info
->page_dma
) {
3288 dma_unmap_page(rx_ring
->dev
,
3289 buffer_info
->page_dma
,
3292 buffer_info
->page_dma
= 0;
3294 if (buffer_info
->page
) {
3295 put_page(buffer_info
->page
);
3296 buffer_info
->page
= NULL
;
3297 buffer_info
->page_offset
= 0;
3301 size
= sizeof(struct igb_rx_buffer
) * rx_ring
->count
;
3302 memset(rx_ring
->rx_buffer_info
, 0, size
);
3304 /* Zero out the descriptor ring */
3305 memset(rx_ring
->desc
, 0, rx_ring
->size
);
3307 rx_ring
->next_to_clean
= 0;
3308 rx_ring
->next_to_use
= 0;
3312 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3313 * @adapter: board private structure
3315 static void igb_clean_all_rx_rings(struct igb_adapter
*adapter
)
3319 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
3320 igb_clean_rx_ring(adapter
->rx_ring
[i
]);
3324 * igb_set_mac - Change the Ethernet Address of the NIC
3325 * @netdev: network interface device structure
3326 * @p: pointer to an address structure
3328 * Returns 0 on success, negative on failure
3330 static int igb_set_mac(struct net_device
*netdev
, void *p
)
3332 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3333 struct e1000_hw
*hw
= &adapter
->hw
;
3334 struct sockaddr
*addr
= p
;
3336 if (!is_valid_ether_addr(addr
->sa_data
))
3337 return -EADDRNOTAVAIL
;
3339 memcpy(netdev
->dev_addr
, addr
->sa_data
, netdev
->addr_len
);
3340 memcpy(hw
->mac
.addr
, addr
->sa_data
, netdev
->addr_len
);
3342 /* set the correct pool for the new PF MAC address in entry 0 */
3343 igb_rar_set_qsel(adapter
, hw
->mac
.addr
, 0,
3344 adapter
->vfs_allocated_count
);
3350 * igb_write_mc_addr_list - write multicast addresses to MTA
3351 * @netdev: network interface device structure
3353 * Writes multicast address list to the MTA hash table.
3354 * Returns: -ENOMEM on failure
3355 * 0 on no addresses written
3356 * X on writing X addresses to MTA
3358 static int igb_write_mc_addr_list(struct net_device
*netdev
)
3360 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3361 struct e1000_hw
*hw
= &adapter
->hw
;
3362 struct netdev_hw_addr
*ha
;
3366 if (netdev_mc_empty(netdev
)) {
3367 /* nothing to program, so clear mc list */
3368 igb_update_mc_addr_list(hw
, NULL
, 0);
3369 igb_restore_vf_multicasts(adapter
);
3373 mta_list
= kzalloc(netdev_mc_count(netdev
) * 6, GFP_ATOMIC
);
3377 /* The shared function expects a packed array of only addresses. */
3379 netdev_for_each_mc_addr(ha
, netdev
)
3380 memcpy(mta_list
+ (i
++ * ETH_ALEN
), ha
->addr
, ETH_ALEN
);
3382 igb_update_mc_addr_list(hw
, mta_list
, i
);
3385 return netdev_mc_count(netdev
);
3389 * igb_write_uc_addr_list - write unicast addresses to RAR table
3390 * @netdev: network interface device structure
3392 * Writes unicast address list to the RAR table.
3393 * Returns: -ENOMEM on failure/insufficient address space
3394 * 0 on no addresses written
3395 * X on writing X addresses to the RAR table
3397 static int igb_write_uc_addr_list(struct net_device
*netdev
)
3399 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3400 struct e1000_hw
*hw
= &adapter
->hw
;
3401 unsigned int vfn
= adapter
->vfs_allocated_count
;
3402 unsigned int rar_entries
= hw
->mac
.rar_entry_count
- (vfn
+ 1);
3405 /* return ENOMEM indicating insufficient memory for addresses */
3406 if (netdev_uc_count(netdev
) > rar_entries
)
3409 if (!netdev_uc_empty(netdev
) && rar_entries
) {
3410 struct netdev_hw_addr
*ha
;
3412 netdev_for_each_uc_addr(ha
, netdev
) {
3415 igb_rar_set_qsel(adapter
, ha
->addr
,
3421 /* write the addresses in reverse order to avoid write combining */
3422 for (; rar_entries
> 0 ; rar_entries
--) {
3423 wr32(E1000_RAH(rar_entries
), 0);
3424 wr32(E1000_RAL(rar_entries
), 0);
3432 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3433 * @netdev: network interface device structure
3435 * The set_rx_mode entry point is called whenever the unicast or multicast
3436 * address lists or the network interface flags are updated. This routine is
3437 * responsible for configuring the hardware for proper unicast, multicast,
3438 * promiscuous mode, and all-multi behavior.
3440 static void igb_set_rx_mode(struct net_device
*netdev
)
3442 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3443 struct e1000_hw
*hw
= &adapter
->hw
;
3444 unsigned int vfn
= adapter
->vfs_allocated_count
;
3445 u32 rctl
, vmolr
= 0;
3448 /* Check for Promiscuous and All Multicast modes */
3449 rctl
= rd32(E1000_RCTL
);
3451 /* clear the effected bits */
3452 rctl
&= ~(E1000_RCTL_UPE
| E1000_RCTL_MPE
| E1000_RCTL_VFE
);
3454 if (netdev
->flags
& IFF_PROMISC
) {
3455 rctl
|= (E1000_RCTL_UPE
| E1000_RCTL_MPE
);
3456 vmolr
|= (E1000_VMOLR_ROPE
| E1000_VMOLR_MPME
);
3458 if (netdev
->flags
& IFF_ALLMULTI
) {
3459 rctl
|= E1000_RCTL_MPE
;
3460 vmolr
|= E1000_VMOLR_MPME
;
3463 * Write addresses to the MTA, if the attempt fails
3464 * then we should just turn on promiscuous mode so
3465 * that we can at least receive multicast traffic
3467 count
= igb_write_mc_addr_list(netdev
);
3469 rctl
|= E1000_RCTL_MPE
;
3470 vmolr
|= E1000_VMOLR_MPME
;
3472 vmolr
|= E1000_VMOLR_ROMPE
;
3476 * Write addresses to available RAR registers, if there is not
3477 * sufficient space to store all the addresses then enable
3478 * unicast promiscuous mode
3480 count
= igb_write_uc_addr_list(netdev
);
3482 rctl
|= E1000_RCTL_UPE
;
3483 vmolr
|= E1000_VMOLR_ROPE
;
3485 rctl
|= E1000_RCTL_VFE
;
3487 wr32(E1000_RCTL
, rctl
);
3490 * In order to support SR-IOV and eventually VMDq it is necessary to set
3491 * the VMOLR to enable the appropriate modes. Without this workaround
3492 * we will have issues with VLAN tag stripping not being done for frames
3493 * that are only arriving because we are the default pool
3495 if (hw
->mac
.type
< e1000_82576
)
3498 vmolr
|= rd32(E1000_VMOLR(vfn
)) &
3499 ~(E1000_VMOLR_ROPE
| E1000_VMOLR_MPME
| E1000_VMOLR_ROMPE
);
3500 wr32(E1000_VMOLR(vfn
), vmolr
);
3501 igb_restore_vf_multicasts(adapter
);
3504 static void igb_check_wvbr(struct igb_adapter
*adapter
)
3506 struct e1000_hw
*hw
= &adapter
->hw
;
3509 switch (hw
->mac
.type
) {
3512 if (!(wvbr
= rd32(E1000_WVBR
)))
3519 adapter
->wvbr
|= wvbr
;
3522 #define IGB_STAGGERED_QUEUE_OFFSET 8
3524 static void igb_spoof_check(struct igb_adapter
*adapter
)
3531 for(j
= 0; j
< adapter
->vfs_allocated_count
; j
++) {
3532 if (adapter
->wvbr
& (1 << j
) ||
3533 adapter
->wvbr
& (1 << (j
+ IGB_STAGGERED_QUEUE_OFFSET
))) {
3534 dev_warn(&adapter
->pdev
->dev
,
3535 "Spoof event(s) detected on VF %d\n", j
);
3538 (1 << (j
+ IGB_STAGGERED_QUEUE_OFFSET
)));
3543 /* Need to wait a few seconds after link up to get diagnostic information from
3545 static void igb_update_phy_info(unsigned long data
)
3547 struct igb_adapter
*adapter
= (struct igb_adapter
*) data
;
3548 igb_get_phy_info(&adapter
->hw
);
3552 * igb_has_link - check shared code for link and determine up/down
3553 * @adapter: pointer to driver private info
3555 bool igb_has_link(struct igb_adapter
*adapter
)
3557 struct e1000_hw
*hw
= &adapter
->hw
;
3558 bool link_active
= false;
3561 /* get_link_status is set on LSC (link status) interrupt or
3562 * rx sequence error interrupt. get_link_status will stay
3563 * false until the e1000_check_for_link establishes link
3564 * for copper adapters ONLY
3566 switch (hw
->phy
.media_type
) {
3567 case e1000_media_type_copper
:
3568 if (hw
->mac
.get_link_status
) {
3569 ret_val
= hw
->mac
.ops
.check_for_link(hw
);
3570 link_active
= !hw
->mac
.get_link_status
;
3575 case e1000_media_type_internal_serdes
:
3576 ret_val
= hw
->mac
.ops
.check_for_link(hw
);
3577 link_active
= hw
->mac
.serdes_has_link
;
3580 case e1000_media_type_unknown
:
3587 static bool igb_thermal_sensor_event(struct e1000_hw
*hw
, u32 event
)
3590 u32 ctrl_ext
, thstat
;
3592 /* check for thermal sensor event on i350, copper only */
3593 if (hw
->mac
.type
== e1000_i350
) {
3594 thstat
= rd32(E1000_THSTAT
);
3595 ctrl_ext
= rd32(E1000_CTRL_EXT
);
3597 if ((hw
->phy
.media_type
== e1000_media_type_copper
) &&
3598 !(ctrl_ext
& E1000_CTRL_EXT_LINK_MODE_SGMII
)) {
3599 ret
= !!(thstat
& event
);
3607 * igb_watchdog - Timer Call-back
3608 * @data: pointer to adapter cast into an unsigned long
3610 static void igb_watchdog(unsigned long data
)
3612 struct igb_adapter
*adapter
= (struct igb_adapter
*)data
;
3613 /* Do the rest outside of interrupt context */
3614 schedule_work(&adapter
->watchdog_task
);
3617 static void igb_watchdog_task(struct work_struct
*work
)
3619 struct igb_adapter
*adapter
= container_of(work
,
3622 struct e1000_hw
*hw
= &adapter
->hw
;
3623 struct net_device
*netdev
= adapter
->netdev
;
3627 link
= igb_has_link(adapter
);
3629 if (!netif_carrier_ok(netdev
)) {
3631 hw
->mac
.ops
.get_speed_and_duplex(hw
,
3632 &adapter
->link_speed
,
3633 &adapter
->link_duplex
);
3635 ctrl
= rd32(E1000_CTRL
);
3636 /* Links status message must follow this format */
3637 printk(KERN_INFO
"igb: %s NIC Link is Up %d Mbps %s, "
3638 "Flow Control: %s\n",
3640 adapter
->link_speed
,
3641 adapter
->link_duplex
== FULL_DUPLEX
?
3642 "Full Duplex" : "Half Duplex",
3643 ((ctrl
& E1000_CTRL_TFCE
) &&
3644 (ctrl
& E1000_CTRL_RFCE
)) ? "RX/TX" :
3645 ((ctrl
& E1000_CTRL_RFCE
) ? "RX" :
3646 ((ctrl
& E1000_CTRL_TFCE
) ? "TX" : "None")));
3648 /* check for thermal sensor event */
3649 if (igb_thermal_sensor_event(hw
, E1000_THSTAT_LINK_THROTTLE
)) {
3650 printk(KERN_INFO
"igb: %s The network adapter "
3651 "link speed was downshifted "
3652 "because it overheated.\n",
3656 /* adjust timeout factor according to speed/duplex */
3657 adapter
->tx_timeout_factor
= 1;
3658 switch (adapter
->link_speed
) {
3660 adapter
->tx_timeout_factor
= 14;
3663 /* maybe add some timeout factor ? */
3667 netif_carrier_on(netdev
);
3669 igb_ping_all_vfs(adapter
);
3670 igb_check_vf_rate_limit(adapter
);
3672 /* link state has changed, schedule phy info update */
3673 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
3674 mod_timer(&adapter
->phy_info_timer
,
3675 round_jiffies(jiffies
+ 2 * HZ
));
3678 if (netif_carrier_ok(netdev
)) {
3679 adapter
->link_speed
= 0;
3680 adapter
->link_duplex
= 0;
3682 /* check for thermal sensor event */
3683 if (igb_thermal_sensor_event(hw
, E1000_THSTAT_PWR_DOWN
)) {
3684 printk(KERN_ERR
"igb: %s The network adapter "
3685 "was stopped because it "
3690 /* Links status message must follow this format */
3691 printk(KERN_INFO
"igb: %s NIC Link is Down\n",
3693 netif_carrier_off(netdev
);
3695 igb_ping_all_vfs(adapter
);
3697 /* link state has changed, schedule phy info update */
3698 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
3699 mod_timer(&adapter
->phy_info_timer
,
3700 round_jiffies(jiffies
+ 2 * HZ
));
3704 spin_lock(&adapter
->stats64_lock
);
3705 igb_update_stats(adapter
, &adapter
->stats64
);
3706 spin_unlock(&adapter
->stats64_lock
);
3708 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
3709 struct igb_ring
*tx_ring
= adapter
->tx_ring
[i
];
3710 if (!netif_carrier_ok(netdev
)) {
3711 /* We've lost link, so the controller stops DMA,
3712 * but we've got queued Tx work that's never going
3713 * to get done, so reset controller to flush Tx.
3714 * (Do the reset outside of interrupt context). */
3715 if (igb_desc_unused(tx_ring
) + 1 < tx_ring
->count
) {
3716 adapter
->tx_timeout_count
++;
3717 schedule_work(&adapter
->reset_task
);
3718 /* return immediately since reset is imminent */
3723 /* Force detection of hung controller every watchdog period */
3724 set_bit(IGB_RING_FLAG_TX_DETECT_HANG
, &tx_ring
->flags
);
3727 /* Cause software interrupt to ensure rx ring is cleaned */
3728 if (adapter
->msix_entries
) {
3730 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
3731 eics
|= adapter
->q_vector
[i
]->eims_value
;
3732 wr32(E1000_EICS
, eics
);
3734 wr32(E1000_ICS
, E1000_ICS_RXDMT0
);
3737 igb_spoof_check(adapter
);
3739 /* Reset the timer */
3740 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
3741 mod_timer(&adapter
->watchdog_timer
,
3742 round_jiffies(jiffies
+ 2 * HZ
));
3745 enum latency_range
{
3749 latency_invalid
= 255
3753 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3755 * Stores a new ITR value based on strictly on packet size. This
3756 * algorithm is less sophisticated than that used in igb_update_itr,
3757 * due to the difficulty of synchronizing statistics across multiple
3758 * receive rings. The divisors and thresholds used by this function
3759 * were determined based on theoretical maximum wire speed and testing
3760 * data, in order to minimize response time while increasing bulk
3762 * This functionality is controlled by the InterruptThrottleRate module
3763 * parameter (see igb_param.c)
3764 * NOTE: This function is called only when operating in a multiqueue
3765 * receive environment.
3766 * @q_vector: pointer to q_vector
3768 static void igb_update_ring_itr(struct igb_q_vector
*q_vector
)
3770 int new_val
= q_vector
->itr_val
;
3771 int avg_wire_size
= 0;
3772 struct igb_adapter
*adapter
= q_vector
->adapter
;
3773 unsigned int packets
;
3775 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3776 * ints/sec - ITR timer value of 120 ticks.
3778 if (adapter
->link_speed
!= SPEED_1000
) {
3779 new_val
= IGB_4K_ITR
;
3783 packets
= q_vector
->rx
.total_packets
;
3785 avg_wire_size
= q_vector
->rx
.total_bytes
/ packets
;
3787 packets
= q_vector
->tx
.total_packets
;
3789 avg_wire_size
= max_t(u32
, avg_wire_size
,
3790 q_vector
->tx
.total_bytes
/ packets
);
3792 /* if avg_wire_size isn't set no work was done */
3796 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3797 avg_wire_size
+= 24;
3799 /* Don't starve jumbo frames */
3800 avg_wire_size
= min(avg_wire_size
, 3000);
3802 /* Give a little boost to mid-size frames */
3803 if ((avg_wire_size
> 300) && (avg_wire_size
< 1200))
3804 new_val
= avg_wire_size
/ 3;
3806 new_val
= avg_wire_size
/ 2;
3808 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3809 if (new_val
< IGB_20K_ITR
&&
3810 ((q_vector
->rx
.ring
&& adapter
->rx_itr_setting
== 3) ||
3811 (!q_vector
->rx
.ring
&& adapter
->tx_itr_setting
== 3)))
3812 new_val
= IGB_20K_ITR
;
3815 if (new_val
!= q_vector
->itr_val
) {
3816 q_vector
->itr_val
= new_val
;
3817 q_vector
->set_itr
= 1;
3820 q_vector
->rx
.total_bytes
= 0;
3821 q_vector
->rx
.total_packets
= 0;
3822 q_vector
->tx
.total_bytes
= 0;
3823 q_vector
->tx
.total_packets
= 0;
3827 * igb_update_itr - update the dynamic ITR value based on statistics
3828 * Stores a new ITR value based on packets and byte
3829 * counts during the last interrupt. The advantage of per interrupt
3830 * computation is faster updates and more accurate ITR for the current
3831 * traffic pattern. Constants in this function were computed
3832 * based on theoretical maximum wire speed and thresholds were set based
3833 * on testing data as well as attempting to minimize response time
3834 * while increasing bulk throughput.
3835 * this functionality is controlled by the InterruptThrottleRate module
3836 * parameter (see igb_param.c)
3837 * NOTE: These calculations are only valid when operating in a single-
3838 * queue environment.
3839 * @q_vector: pointer to q_vector
3840 * @ring_container: ring info to update the itr for
3842 static void igb_update_itr(struct igb_q_vector
*q_vector
,
3843 struct igb_ring_container
*ring_container
)
3845 unsigned int packets
= ring_container
->total_packets
;
3846 unsigned int bytes
= ring_container
->total_bytes
;
3847 u8 itrval
= ring_container
->itr
;
3849 /* no packets, exit with status unchanged */
3854 case lowest_latency
:
3855 /* handle TSO and jumbo frames */
3856 if (bytes
/packets
> 8000)
3857 itrval
= bulk_latency
;
3858 else if ((packets
< 5) && (bytes
> 512))
3859 itrval
= low_latency
;
3861 case low_latency
: /* 50 usec aka 20000 ints/s */
3862 if (bytes
> 10000) {
3863 /* this if handles the TSO accounting */
3864 if (bytes
/packets
> 8000) {
3865 itrval
= bulk_latency
;
3866 } else if ((packets
< 10) || ((bytes
/packets
) > 1200)) {
3867 itrval
= bulk_latency
;
3868 } else if ((packets
> 35)) {
3869 itrval
= lowest_latency
;
3871 } else if (bytes
/packets
> 2000) {
3872 itrval
= bulk_latency
;
3873 } else if (packets
<= 2 && bytes
< 512) {
3874 itrval
= lowest_latency
;
3877 case bulk_latency
: /* 250 usec aka 4000 ints/s */
3878 if (bytes
> 25000) {
3880 itrval
= low_latency
;
3881 } else if (bytes
< 1500) {
3882 itrval
= low_latency
;
3887 /* clear work counters since we have the values we need */
3888 ring_container
->total_bytes
= 0;
3889 ring_container
->total_packets
= 0;
3891 /* write updated itr to ring container */
3892 ring_container
->itr
= itrval
;
3895 static void igb_set_itr(struct igb_q_vector
*q_vector
)
3897 struct igb_adapter
*adapter
= q_vector
->adapter
;
3898 u32 new_itr
= q_vector
->itr_val
;
3901 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3902 if (adapter
->link_speed
!= SPEED_1000
) {
3904 new_itr
= IGB_4K_ITR
;
3908 igb_update_itr(q_vector
, &q_vector
->tx
);
3909 igb_update_itr(q_vector
, &q_vector
->rx
);
3911 current_itr
= max(q_vector
->rx
.itr
, q_vector
->tx
.itr
);
3913 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3914 if (current_itr
== lowest_latency
&&
3915 ((q_vector
->rx
.ring
&& adapter
->rx_itr_setting
== 3) ||
3916 (!q_vector
->rx
.ring
&& adapter
->tx_itr_setting
== 3)))
3917 current_itr
= low_latency
;
3919 switch (current_itr
) {
3920 /* counts and packets in update_itr are dependent on these numbers */
3921 case lowest_latency
:
3922 new_itr
= IGB_70K_ITR
; /* 70,000 ints/sec */
3925 new_itr
= IGB_20K_ITR
; /* 20,000 ints/sec */
3928 new_itr
= IGB_4K_ITR
; /* 4,000 ints/sec */
3935 if (new_itr
!= q_vector
->itr_val
) {
3936 /* this attempts to bias the interrupt rate towards Bulk
3937 * by adding intermediate steps when interrupt rate is
3939 new_itr
= new_itr
> q_vector
->itr_val
?
3940 max((new_itr
* q_vector
->itr_val
) /
3941 (new_itr
+ (q_vector
->itr_val
>> 2)),
3944 /* Don't write the value here; it resets the adapter's
3945 * internal timer, and causes us to delay far longer than
3946 * we should between interrupts. Instead, we write the ITR
3947 * value at the beginning of the next interrupt so the timing
3948 * ends up being correct.
3950 q_vector
->itr_val
= new_itr
;
3951 q_vector
->set_itr
= 1;
3955 void igb_tx_ctxtdesc(struct igb_ring
*tx_ring
, u32 vlan_macip_lens
,
3956 u32 type_tucmd
, u32 mss_l4len_idx
)
3958 struct e1000_adv_tx_context_desc
*context_desc
;
3959 u16 i
= tx_ring
->next_to_use
;
3961 context_desc
= IGB_TX_CTXTDESC(tx_ring
, i
);
3964 tx_ring
->next_to_use
= (i
< tx_ring
->count
) ? i
: 0;
3966 /* set bits to identify this as an advanced context descriptor */
3967 type_tucmd
|= E1000_TXD_CMD_DEXT
| E1000_ADVTXD_DTYP_CTXT
;
3969 /* For 82575, context index must be unique per ring. */
3970 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX
, &tx_ring
->flags
))
3971 mss_l4len_idx
|= tx_ring
->reg_idx
<< 4;
3973 context_desc
->vlan_macip_lens
= cpu_to_le32(vlan_macip_lens
);
3974 context_desc
->seqnum_seed
= 0;
3975 context_desc
->type_tucmd_mlhl
= cpu_to_le32(type_tucmd
);
3976 context_desc
->mss_l4len_idx
= cpu_to_le32(mss_l4len_idx
);
3979 static int igb_tso(struct igb_ring
*tx_ring
,
3980 struct igb_tx_buffer
*first
,
3983 struct sk_buff
*skb
= first
->skb
;
3984 u32 vlan_macip_lens
, type_tucmd
;
3985 u32 mss_l4len_idx
, l4len
;
3987 if (!skb_is_gso(skb
))
3990 if (skb_header_cloned(skb
)) {
3991 int err
= pskb_expand_head(skb
, 0, 0, GFP_ATOMIC
);
3996 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3997 type_tucmd
= E1000_ADVTXD_TUCMD_L4T_TCP
;
3999 if (first
->protocol
== __constant_htons(ETH_P_IP
)) {
4000 struct iphdr
*iph
= ip_hdr(skb
);
4003 tcp_hdr(skb
)->check
= ~csum_tcpudp_magic(iph
->saddr
,
4007 type_tucmd
|= E1000_ADVTXD_TUCMD_IPV4
;
4008 first
->tx_flags
|= IGB_TX_FLAGS_TSO
|
4011 } else if (skb_is_gso_v6(skb
)) {
4012 ipv6_hdr(skb
)->payload_len
= 0;
4013 tcp_hdr(skb
)->check
= ~csum_ipv6_magic(&ipv6_hdr(skb
)->saddr
,
4014 &ipv6_hdr(skb
)->daddr
,
4016 first
->tx_flags
|= IGB_TX_FLAGS_TSO
|
4020 /* compute header lengths */
4021 l4len
= tcp_hdrlen(skb
);
4022 *hdr_len
= skb_transport_offset(skb
) + l4len
;
4024 /* update gso size and bytecount with header size */
4025 first
->gso_segs
= skb_shinfo(skb
)->gso_segs
;
4026 first
->bytecount
+= (first
->gso_segs
- 1) * *hdr_len
;
4029 mss_l4len_idx
= l4len
<< E1000_ADVTXD_L4LEN_SHIFT
;
4030 mss_l4len_idx
|= skb_shinfo(skb
)->gso_size
<< E1000_ADVTXD_MSS_SHIFT
;
4032 /* VLAN MACLEN IPLEN */
4033 vlan_macip_lens
= skb_network_header_len(skb
);
4034 vlan_macip_lens
|= skb_network_offset(skb
) << E1000_ADVTXD_MACLEN_SHIFT
;
4035 vlan_macip_lens
|= first
->tx_flags
& IGB_TX_FLAGS_VLAN_MASK
;
4037 igb_tx_ctxtdesc(tx_ring
, vlan_macip_lens
, type_tucmd
, mss_l4len_idx
);
4042 static void igb_tx_csum(struct igb_ring
*tx_ring
, struct igb_tx_buffer
*first
)
4044 struct sk_buff
*skb
= first
->skb
;
4045 u32 vlan_macip_lens
= 0;
4046 u32 mss_l4len_idx
= 0;
4049 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
) {
4050 if (!(first
->tx_flags
& IGB_TX_FLAGS_VLAN
))
4054 switch (first
->protocol
) {
4055 case __constant_htons(ETH_P_IP
):
4056 vlan_macip_lens
|= skb_network_header_len(skb
);
4057 type_tucmd
|= E1000_ADVTXD_TUCMD_IPV4
;
4058 l4_hdr
= ip_hdr(skb
)->protocol
;
4060 case __constant_htons(ETH_P_IPV6
):
4061 vlan_macip_lens
|= skb_network_header_len(skb
);
4062 l4_hdr
= ipv6_hdr(skb
)->nexthdr
;
4065 if (unlikely(net_ratelimit())) {
4066 dev_warn(tx_ring
->dev
,
4067 "partial checksum but proto=%x!\n",
4075 type_tucmd
|= E1000_ADVTXD_TUCMD_L4T_TCP
;
4076 mss_l4len_idx
= tcp_hdrlen(skb
) <<
4077 E1000_ADVTXD_L4LEN_SHIFT
;
4080 type_tucmd
|= E1000_ADVTXD_TUCMD_L4T_SCTP
;
4081 mss_l4len_idx
= sizeof(struct sctphdr
) <<
4082 E1000_ADVTXD_L4LEN_SHIFT
;
4085 mss_l4len_idx
= sizeof(struct udphdr
) <<
4086 E1000_ADVTXD_L4LEN_SHIFT
;
4089 if (unlikely(net_ratelimit())) {
4090 dev_warn(tx_ring
->dev
,
4091 "partial checksum but l4 proto=%x!\n",
4097 /* update TX checksum flag */
4098 first
->tx_flags
|= IGB_TX_FLAGS_CSUM
;
4101 vlan_macip_lens
|= skb_network_offset(skb
) << E1000_ADVTXD_MACLEN_SHIFT
;
4102 vlan_macip_lens
|= first
->tx_flags
& IGB_TX_FLAGS_VLAN_MASK
;
4104 igb_tx_ctxtdesc(tx_ring
, vlan_macip_lens
, type_tucmd
, mss_l4len_idx
);
4107 static __le32
igb_tx_cmd_type(u32 tx_flags
)
4109 /* set type for advanced descriptor with frame checksum insertion */
4110 __le32 cmd_type
= cpu_to_le32(E1000_ADVTXD_DTYP_DATA
|
4111 E1000_ADVTXD_DCMD_IFCS
|
4112 E1000_ADVTXD_DCMD_DEXT
);
4114 /* set HW vlan bit if vlan is present */
4115 if (tx_flags
& IGB_TX_FLAGS_VLAN
)
4116 cmd_type
|= cpu_to_le32(E1000_ADVTXD_DCMD_VLE
);
4118 /* set timestamp bit if present */
4119 if (tx_flags
& IGB_TX_FLAGS_TSTAMP
)
4120 cmd_type
|= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP
);
4122 /* set segmentation bits for TSO */
4123 if (tx_flags
& IGB_TX_FLAGS_TSO
)
4124 cmd_type
|= cpu_to_le32(E1000_ADVTXD_DCMD_TSE
);
4129 static void igb_tx_olinfo_status(struct igb_ring
*tx_ring
,
4130 union e1000_adv_tx_desc
*tx_desc
,
4131 u32 tx_flags
, unsigned int paylen
)
4133 u32 olinfo_status
= paylen
<< E1000_ADVTXD_PAYLEN_SHIFT
;
4135 /* 82575 requires a unique index per ring if any offload is enabled */
4136 if ((tx_flags
& (IGB_TX_FLAGS_CSUM
| IGB_TX_FLAGS_VLAN
)) &&
4137 test_bit(IGB_RING_FLAG_TX_CTX_IDX
, &tx_ring
->flags
))
4138 olinfo_status
|= tx_ring
->reg_idx
<< 4;
4140 /* insert L4 checksum */
4141 if (tx_flags
& IGB_TX_FLAGS_CSUM
) {
4142 olinfo_status
|= E1000_TXD_POPTS_TXSM
<< 8;
4144 /* insert IPv4 checksum */
4145 if (tx_flags
& IGB_TX_FLAGS_IPV4
)
4146 olinfo_status
|= E1000_TXD_POPTS_IXSM
<< 8;
4149 tx_desc
->read
.olinfo_status
= cpu_to_le32(olinfo_status
);
4153 * The largest size we can write to the descriptor is 65535. In order to
4154 * maintain a power of two alignment we have to limit ourselves to 32K.
4156 #define IGB_MAX_TXD_PWR 15
4157 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4159 static void igb_tx_map(struct igb_ring
*tx_ring
,
4160 struct igb_tx_buffer
*first
,
4163 struct sk_buff
*skb
= first
->skb
;
4164 struct igb_tx_buffer
*tx_buffer_info
;
4165 union e1000_adv_tx_desc
*tx_desc
;
4167 struct skb_frag_struct
*frag
= &skb_shinfo(skb
)->frags
[0];
4168 unsigned int data_len
= skb
->data_len
;
4169 unsigned int size
= skb_headlen(skb
);
4170 unsigned int paylen
= skb
->len
- hdr_len
;
4172 u32 tx_flags
= first
->tx_flags
;
4173 u16 i
= tx_ring
->next_to_use
;
4175 tx_desc
= IGB_TX_DESC(tx_ring
, i
);
4177 igb_tx_olinfo_status(tx_ring
, tx_desc
, tx_flags
, paylen
);
4178 cmd_type
= igb_tx_cmd_type(tx_flags
);
4180 dma
= dma_map_single(tx_ring
->dev
, skb
->data
, size
, DMA_TO_DEVICE
);
4181 if (dma_mapping_error(tx_ring
->dev
, dma
))
4184 /* record length, and DMA address */
4185 first
->length
= size
;
4187 tx_desc
->read
.buffer_addr
= cpu_to_le64(dma
);
4190 while (unlikely(size
> IGB_MAX_DATA_PER_TXD
)) {
4191 tx_desc
->read
.cmd_type_len
=
4192 cmd_type
| cpu_to_le32(IGB_MAX_DATA_PER_TXD
);
4196 if (i
== tx_ring
->count
) {
4197 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
4201 dma
+= IGB_MAX_DATA_PER_TXD
;
4202 size
-= IGB_MAX_DATA_PER_TXD
;
4204 tx_desc
->read
.olinfo_status
= 0;
4205 tx_desc
->read
.buffer_addr
= cpu_to_le64(dma
);
4208 if (likely(!data_len
))
4211 tx_desc
->read
.cmd_type_len
= cmd_type
| cpu_to_le32(size
);
4215 if (i
== tx_ring
->count
) {
4216 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
4220 size
= skb_frag_size(frag
);
4223 dma
= skb_frag_dma_map(tx_ring
->dev
, frag
, 0,
4224 size
, DMA_TO_DEVICE
);
4225 if (dma_mapping_error(tx_ring
->dev
, dma
))
4228 tx_buffer_info
= &tx_ring
->tx_buffer_info
[i
];
4229 tx_buffer_info
->length
= size
;
4230 tx_buffer_info
->dma
= dma
;
4232 tx_desc
->read
.olinfo_status
= 0;
4233 tx_desc
->read
.buffer_addr
= cpu_to_le64(dma
);
4238 /* write last descriptor with RS and EOP bits */
4239 cmd_type
|= cpu_to_le32(size
) | cpu_to_le32(IGB_TXD_DCMD
);
4240 tx_desc
->read
.cmd_type_len
= cmd_type
;
4242 /* set the timestamp */
4243 first
->time_stamp
= jiffies
;
4246 * Force memory writes to complete before letting h/w know there
4247 * are new descriptors to fetch. (Only applicable for weak-ordered
4248 * memory model archs, such as IA-64).
4250 * We also need this memory barrier to make certain all of the
4251 * status bits have been updated before next_to_watch is written.
4255 /* set next_to_watch value indicating a packet is present */
4256 first
->next_to_watch
= tx_desc
;
4259 if (i
== tx_ring
->count
)
4262 tx_ring
->next_to_use
= i
;
4264 writel(i
, tx_ring
->tail
);
4266 /* we need this if more than one processor can write to our tail
4267 * at a time, it syncronizes IO on IA64/Altix systems */
4273 dev_err(tx_ring
->dev
, "TX DMA map failed\n");
4275 /* clear dma mappings for failed tx_buffer_info map */
4277 tx_buffer_info
= &tx_ring
->tx_buffer_info
[i
];
4278 igb_unmap_and_free_tx_resource(tx_ring
, tx_buffer_info
);
4279 if (tx_buffer_info
== first
)
4286 tx_ring
->next_to_use
= i
;
4289 static int __igb_maybe_stop_tx(struct igb_ring
*tx_ring
, const u16 size
)
4291 struct net_device
*netdev
= tx_ring
->netdev
;
4293 netif_stop_subqueue(netdev
, tx_ring
->queue_index
);
4295 /* Herbert's original patch had:
4296 * smp_mb__after_netif_stop_queue();
4297 * but since that doesn't exist yet, just open code it. */
4300 /* We need to check again in a case another CPU has just
4301 * made room available. */
4302 if (igb_desc_unused(tx_ring
) < size
)
4306 netif_wake_subqueue(netdev
, tx_ring
->queue_index
);
4308 u64_stats_update_begin(&tx_ring
->tx_syncp2
);
4309 tx_ring
->tx_stats
.restart_queue2
++;
4310 u64_stats_update_end(&tx_ring
->tx_syncp2
);
4315 static inline int igb_maybe_stop_tx(struct igb_ring
*tx_ring
, const u16 size
)
4317 if (igb_desc_unused(tx_ring
) >= size
)
4319 return __igb_maybe_stop_tx(tx_ring
, size
);
4322 netdev_tx_t
igb_xmit_frame_ring(struct sk_buff
*skb
,
4323 struct igb_ring
*tx_ring
)
4325 struct igb_tx_buffer
*first
;
4328 __be16 protocol
= vlan_get_protocol(skb
);
4331 /* need: 1 descriptor per page,
4332 * + 2 desc gap to keep tail from touching head,
4333 * + 1 desc for skb->data,
4334 * + 1 desc for context descriptor,
4335 * otherwise try next time */
4336 if (igb_maybe_stop_tx(tx_ring
, skb_shinfo(skb
)->nr_frags
+ 4)) {
4337 /* this is a hard error */
4338 return NETDEV_TX_BUSY
;
4341 /* record the location of the first descriptor for this packet */
4342 first
= &tx_ring
->tx_buffer_info
[tx_ring
->next_to_use
];
4344 first
->bytecount
= skb
->len
;
4345 first
->gso_segs
= 1;
4347 if (unlikely(skb_shinfo(skb
)->tx_flags
& SKBTX_HW_TSTAMP
)) {
4348 skb_shinfo(skb
)->tx_flags
|= SKBTX_IN_PROGRESS
;
4349 tx_flags
|= IGB_TX_FLAGS_TSTAMP
;
4352 if (vlan_tx_tag_present(skb
)) {
4353 tx_flags
|= IGB_TX_FLAGS_VLAN
;
4354 tx_flags
|= (vlan_tx_tag_get(skb
) << IGB_TX_FLAGS_VLAN_SHIFT
);
4357 /* record initial flags and protocol */
4358 first
->tx_flags
= tx_flags
;
4359 first
->protocol
= protocol
;
4361 tso
= igb_tso(tx_ring
, first
, &hdr_len
);
4365 igb_tx_csum(tx_ring
, first
);
4367 igb_tx_map(tx_ring
, first
, hdr_len
);
4369 /* Make sure there is space in the ring for the next send. */
4370 igb_maybe_stop_tx(tx_ring
, MAX_SKB_FRAGS
+ 4);
4372 return NETDEV_TX_OK
;
4375 igb_unmap_and_free_tx_resource(tx_ring
, first
);
4377 return NETDEV_TX_OK
;
4380 static inline struct igb_ring
*igb_tx_queue_mapping(struct igb_adapter
*adapter
,
4381 struct sk_buff
*skb
)
4383 unsigned int r_idx
= skb
->queue_mapping
;
4385 if (r_idx
>= adapter
->num_tx_queues
)
4386 r_idx
= r_idx
% adapter
->num_tx_queues
;
4388 return adapter
->tx_ring
[r_idx
];
4391 static netdev_tx_t
igb_xmit_frame(struct sk_buff
*skb
,
4392 struct net_device
*netdev
)
4394 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4396 if (test_bit(__IGB_DOWN
, &adapter
->state
)) {
4397 dev_kfree_skb_any(skb
);
4398 return NETDEV_TX_OK
;
4401 if (skb
->len
<= 0) {
4402 dev_kfree_skb_any(skb
);
4403 return NETDEV_TX_OK
;
4407 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4408 * in order to meet this minimum size requirement.
4410 if (skb
->len
< 17) {
4411 if (skb_padto(skb
, 17))
4412 return NETDEV_TX_OK
;
4416 return igb_xmit_frame_ring(skb
, igb_tx_queue_mapping(adapter
, skb
));
4420 * igb_tx_timeout - Respond to a Tx Hang
4421 * @netdev: network interface device structure
4423 static void igb_tx_timeout(struct net_device
*netdev
)
4425 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4426 struct e1000_hw
*hw
= &adapter
->hw
;
4428 /* Do the reset outside of interrupt context */
4429 adapter
->tx_timeout_count
++;
4431 if (hw
->mac
.type
>= e1000_82580
)
4432 hw
->dev_spec
._82575
.global_device_reset
= true;
4434 schedule_work(&adapter
->reset_task
);
4436 (adapter
->eims_enable_mask
& ~adapter
->eims_other
));
4439 static void igb_reset_task(struct work_struct
*work
)
4441 struct igb_adapter
*adapter
;
4442 adapter
= container_of(work
, struct igb_adapter
, reset_task
);
4445 netdev_err(adapter
->netdev
, "Reset adapter\n");
4446 igb_reinit_locked(adapter
);
4450 * igb_get_stats64 - Get System Network Statistics
4451 * @netdev: network interface device structure
4452 * @stats: rtnl_link_stats64 pointer
4455 static struct rtnl_link_stats64
*igb_get_stats64(struct net_device
*netdev
,
4456 struct rtnl_link_stats64
*stats
)
4458 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4460 spin_lock(&adapter
->stats64_lock
);
4461 igb_update_stats(adapter
, &adapter
->stats64
);
4462 memcpy(stats
, &adapter
->stats64
, sizeof(*stats
));
4463 spin_unlock(&adapter
->stats64_lock
);
4469 * igb_change_mtu - Change the Maximum Transfer Unit
4470 * @netdev: network interface device structure
4471 * @new_mtu: new value for maximum frame size
4473 * Returns 0 on success, negative on failure
4475 static int igb_change_mtu(struct net_device
*netdev
, int new_mtu
)
4477 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4478 struct pci_dev
*pdev
= adapter
->pdev
;
4479 int max_frame
= new_mtu
+ ETH_HLEN
+ ETH_FCS_LEN
+ VLAN_HLEN
;
4481 if ((new_mtu
< 68) || (max_frame
> MAX_JUMBO_FRAME_SIZE
)) {
4482 dev_err(&pdev
->dev
, "Invalid MTU setting\n");
4486 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4487 if (max_frame
> MAX_STD_JUMBO_FRAME_SIZE
) {
4488 dev_err(&pdev
->dev
, "MTU > 9216 not supported.\n");
4492 while (test_and_set_bit(__IGB_RESETTING
, &adapter
->state
))
4495 /* igb_down has a dependency on max_frame_size */
4496 adapter
->max_frame_size
= max_frame
;
4498 if (netif_running(netdev
))
4501 dev_info(&pdev
->dev
, "changing MTU from %d to %d\n",
4502 netdev
->mtu
, new_mtu
);
4503 netdev
->mtu
= new_mtu
;
4505 if (netif_running(netdev
))
4510 clear_bit(__IGB_RESETTING
, &adapter
->state
);
4516 * igb_update_stats - Update the board statistics counters
4517 * @adapter: board private structure
4520 void igb_update_stats(struct igb_adapter
*adapter
,
4521 struct rtnl_link_stats64
*net_stats
)
4523 struct e1000_hw
*hw
= &adapter
->hw
;
4524 struct pci_dev
*pdev
= adapter
->pdev
;
4530 u64 _bytes
, _packets
;
4532 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4535 * Prevent stats update while adapter is being reset, or if the pci
4536 * connection is down.
4538 if (adapter
->link_speed
== 0)
4540 if (pci_channel_offline(pdev
))
4545 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
4546 u32 rqdpc_tmp
= rd32(E1000_RQDPC(i
)) & 0x0FFF;
4547 struct igb_ring
*ring
= adapter
->rx_ring
[i
];
4549 ring
->rx_stats
.drops
+= rqdpc_tmp
;
4550 net_stats
->rx_fifo_errors
+= rqdpc_tmp
;
4553 start
= u64_stats_fetch_begin_bh(&ring
->rx_syncp
);
4554 _bytes
= ring
->rx_stats
.bytes
;
4555 _packets
= ring
->rx_stats
.packets
;
4556 } while (u64_stats_fetch_retry_bh(&ring
->rx_syncp
, start
));
4558 packets
+= _packets
;
4561 net_stats
->rx_bytes
= bytes
;
4562 net_stats
->rx_packets
= packets
;
4566 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
4567 struct igb_ring
*ring
= adapter
->tx_ring
[i
];
4569 start
= u64_stats_fetch_begin_bh(&ring
->tx_syncp
);
4570 _bytes
= ring
->tx_stats
.bytes
;
4571 _packets
= ring
->tx_stats
.packets
;
4572 } while (u64_stats_fetch_retry_bh(&ring
->tx_syncp
, start
));
4574 packets
+= _packets
;
4576 net_stats
->tx_bytes
= bytes
;
4577 net_stats
->tx_packets
= packets
;
4579 /* read stats registers */
4580 adapter
->stats
.crcerrs
+= rd32(E1000_CRCERRS
);
4581 adapter
->stats
.gprc
+= rd32(E1000_GPRC
);
4582 adapter
->stats
.gorc
+= rd32(E1000_GORCL
);
4583 rd32(E1000_GORCH
); /* clear GORCL */
4584 adapter
->stats
.bprc
+= rd32(E1000_BPRC
);
4585 adapter
->stats
.mprc
+= rd32(E1000_MPRC
);
4586 adapter
->stats
.roc
+= rd32(E1000_ROC
);
4588 adapter
->stats
.prc64
+= rd32(E1000_PRC64
);
4589 adapter
->stats
.prc127
+= rd32(E1000_PRC127
);
4590 adapter
->stats
.prc255
+= rd32(E1000_PRC255
);
4591 adapter
->stats
.prc511
+= rd32(E1000_PRC511
);
4592 adapter
->stats
.prc1023
+= rd32(E1000_PRC1023
);
4593 adapter
->stats
.prc1522
+= rd32(E1000_PRC1522
);
4594 adapter
->stats
.symerrs
+= rd32(E1000_SYMERRS
);
4595 adapter
->stats
.sec
+= rd32(E1000_SEC
);
4597 mpc
= rd32(E1000_MPC
);
4598 adapter
->stats
.mpc
+= mpc
;
4599 net_stats
->rx_fifo_errors
+= mpc
;
4600 adapter
->stats
.scc
+= rd32(E1000_SCC
);
4601 adapter
->stats
.ecol
+= rd32(E1000_ECOL
);
4602 adapter
->stats
.mcc
+= rd32(E1000_MCC
);
4603 adapter
->stats
.latecol
+= rd32(E1000_LATECOL
);
4604 adapter
->stats
.dc
+= rd32(E1000_DC
);
4605 adapter
->stats
.rlec
+= rd32(E1000_RLEC
);
4606 adapter
->stats
.xonrxc
+= rd32(E1000_XONRXC
);
4607 adapter
->stats
.xontxc
+= rd32(E1000_XONTXC
);
4608 adapter
->stats
.xoffrxc
+= rd32(E1000_XOFFRXC
);
4609 adapter
->stats
.xofftxc
+= rd32(E1000_XOFFTXC
);
4610 adapter
->stats
.fcruc
+= rd32(E1000_FCRUC
);
4611 adapter
->stats
.gptc
+= rd32(E1000_GPTC
);
4612 adapter
->stats
.gotc
+= rd32(E1000_GOTCL
);
4613 rd32(E1000_GOTCH
); /* clear GOTCL */
4614 adapter
->stats
.rnbc
+= rd32(E1000_RNBC
);
4615 adapter
->stats
.ruc
+= rd32(E1000_RUC
);
4616 adapter
->stats
.rfc
+= rd32(E1000_RFC
);
4617 adapter
->stats
.rjc
+= rd32(E1000_RJC
);
4618 adapter
->stats
.tor
+= rd32(E1000_TORH
);
4619 adapter
->stats
.tot
+= rd32(E1000_TOTH
);
4620 adapter
->stats
.tpr
+= rd32(E1000_TPR
);
4622 adapter
->stats
.ptc64
+= rd32(E1000_PTC64
);
4623 adapter
->stats
.ptc127
+= rd32(E1000_PTC127
);
4624 adapter
->stats
.ptc255
+= rd32(E1000_PTC255
);
4625 adapter
->stats
.ptc511
+= rd32(E1000_PTC511
);
4626 adapter
->stats
.ptc1023
+= rd32(E1000_PTC1023
);
4627 adapter
->stats
.ptc1522
+= rd32(E1000_PTC1522
);
4629 adapter
->stats
.mptc
+= rd32(E1000_MPTC
);
4630 adapter
->stats
.bptc
+= rd32(E1000_BPTC
);
4632 adapter
->stats
.tpt
+= rd32(E1000_TPT
);
4633 adapter
->stats
.colc
+= rd32(E1000_COLC
);
4635 adapter
->stats
.algnerrc
+= rd32(E1000_ALGNERRC
);
4636 /* read internal phy specific stats */
4637 reg
= rd32(E1000_CTRL_EXT
);
4638 if (!(reg
& E1000_CTRL_EXT_LINK_MODE_MASK
)) {
4639 adapter
->stats
.rxerrc
+= rd32(E1000_RXERRC
);
4640 adapter
->stats
.tncrs
+= rd32(E1000_TNCRS
);
4643 adapter
->stats
.tsctc
+= rd32(E1000_TSCTC
);
4644 adapter
->stats
.tsctfc
+= rd32(E1000_TSCTFC
);
4646 adapter
->stats
.iac
+= rd32(E1000_IAC
);
4647 adapter
->stats
.icrxoc
+= rd32(E1000_ICRXOC
);
4648 adapter
->stats
.icrxptc
+= rd32(E1000_ICRXPTC
);
4649 adapter
->stats
.icrxatc
+= rd32(E1000_ICRXATC
);
4650 adapter
->stats
.ictxptc
+= rd32(E1000_ICTXPTC
);
4651 adapter
->stats
.ictxatc
+= rd32(E1000_ICTXATC
);
4652 adapter
->stats
.ictxqec
+= rd32(E1000_ICTXQEC
);
4653 adapter
->stats
.ictxqmtc
+= rd32(E1000_ICTXQMTC
);
4654 adapter
->stats
.icrxdmtc
+= rd32(E1000_ICRXDMTC
);
4656 /* Fill out the OS statistics structure */
4657 net_stats
->multicast
= adapter
->stats
.mprc
;
4658 net_stats
->collisions
= adapter
->stats
.colc
;
4662 /* RLEC on some newer hardware can be incorrect so build
4663 * our own version based on RUC and ROC */
4664 net_stats
->rx_errors
= adapter
->stats
.rxerrc
+
4665 adapter
->stats
.crcerrs
+ adapter
->stats
.algnerrc
+
4666 adapter
->stats
.ruc
+ adapter
->stats
.roc
+
4667 adapter
->stats
.cexterr
;
4668 net_stats
->rx_length_errors
= adapter
->stats
.ruc
+
4670 net_stats
->rx_crc_errors
= adapter
->stats
.crcerrs
;
4671 net_stats
->rx_frame_errors
= adapter
->stats
.algnerrc
;
4672 net_stats
->rx_missed_errors
= adapter
->stats
.mpc
;
4675 net_stats
->tx_errors
= adapter
->stats
.ecol
+
4676 adapter
->stats
.latecol
;
4677 net_stats
->tx_aborted_errors
= adapter
->stats
.ecol
;
4678 net_stats
->tx_window_errors
= adapter
->stats
.latecol
;
4679 net_stats
->tx_carrier_errors
= adapter
->stats
.tncrs
;
4681 /* Tx Dropped needs to be maintained elsewhere */
4684 if (hw
->phy
.media_type
== e1000_media_type_copper
) {
4685 if ((adapter
->link_speed
== SPEED_1000
) &&
4686 (!igb_read_phy_reg(hw
, PHY_1000T_STATUS
, &phy_tmp
))) {
4687 phy_tmp
&= PHY_IDLE_ERROR_COUNT_MASK
;
4688 adapter
->phy_stats
.idle_errors
+= phy_tmp
;
4692 /* Management Stats */
4693 adapter
->stats
.mgptc
+= rd32(E1000_MGTPTC
);
4694 adapter
->stats
.mgprc
+= rd32(E1000_MGTPRC
);
4695 adapter
->stats
.mgpdc
+= rd32(E1000_MGTPDC
);
4698 reg
= rd32(E1000_MANC
);
4699 if (reg
& E1000_MANC_EN_BMC2OS
) {
4700 adapter
->stats
.o2bgptc
+= rd32(E1000_O2BGPTC
);
4701 adapter
->stats
.o2bspc
+= rd32(E1000_O2BSPC
);
4702 adapter
->stats
.b2ospc
+= rd32(E1000_B2OSPC
);
4703 adapter
->stats
.b2ogprc
+= rd32(E1000_B2OGPRC
);
4707 static irqreturn_t
igb_msix_other(int irq
, void *data
)
4709 struct igb_adapter
*adapter
= data
;
4710 struct e1000_hw
*hw
= &adapter
->hw
;
4711 u32 icr
= rd32(E1000_ICR
);
4712 /* reading ICR causes bit 31 of EICR to be cleared */
4714 if (icr
& E1000_ICR_DRSTA
)
4715 schedule_work(&adapter
->reset_task
);
4717 if (icr
& E1000_ICR_DOUTSYNC
) {
4718 /* HW is reporting DMA is out of sync */
4719 adapter
->stats
.doosync
++;
4720 /* The DMA Out of Sync is also indication of a spoof event
4721 * in IOV mode. Check the Wrong VM Behavior register to
4722 * see if it is really a spoof event. */
4723 igb_check_wvbr(adapter
);
4726 /* Check for a mailbox event */
4727 if (icr
& E1000_ICR_VMMB
)
4728 igb_msg_task(adapter
);
4730 if (icr
& E1000_ICR_LSC
) {
4731 hw
->mac
.get_link_status
= 1;
4732 /* guard against interrupt when we're going down */
4733 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
4734 mod_timer(&adapter
->watchdog_timer
, jiffies
+ 1);
4737 wr32(E1000_EIMS
, adapter
->eims_other
);
4742 static void igb_write_itr(struct igb_q_vector
*q_vector
)
4744 struct igb_adapter
*adapter
= q_vector
->adapter
;
4745 u32 itr_val
= q_vector
->itr_val
& 0x7FFC;
4747 if (!q_vector
->set_itr
)
4753 if (adapter
->hw
.mac
.type
== e1000_82575
)
4754 itr_val
|= itr_val
<< 16;
4756 itr_val
|= E1000_EITR_CNT_IGNR
;
4758 writel(itr_val
, q_vector
->itr_register
);
4759 q_vector
->set_itr
= 0;
4762 static irqreturn_t
igb_msix_ring(int irq
, void *data
)
4764 struct igb_q_vector
*q_vector
= data
;
4766 /* Write the ITR value calculated from the previous interrupt. */
4767 igb_write_itr(q_vector
);
4769 napi_schedule(&q_vector
->napi
);
4774 #ifdef CONFIG_IGB_DCA
4775 static void igb_update_dca(struct igb_q_vector
*q_vector
)
4777 struct igb_adapter
*adapter
= q_vector
->adapter
;
4778 struct e1000_hw
*hw
= &adapter
->hw
;
4779 int cpu
= get_cpu();
4781 if (q_vector
->cpu
== cpu
)
4784 if (q_vector
->tx
.ring
) {
4785 int q
= q_vector
->tx
.ring
->reg_idx
;
4786 u32 dca_txctrl
= rd32(E1000_DCA_TXCTRL(q
));
4787 if (hw
->mac
.type
== e1000_82575
) {
4788 dca_txctrl
&= ~E1000_DCA_TXCTRL_CPUID_MASK
;
4789 dca_txctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
);
4791 dca_txctrl
&= ~E1000_DCA_TXCTRL_CPUID_MASK_82576
;
4792 dca_txctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
) <<
4793 E1000_DCA_TXCTRL_CPUID_SHIFT
;
4795 dca_txctrl
|= E1000_DCA_TXCTRL_DESC_DCA_EN
;
4796 wr32(E1000_DCA_TXCTRL(q
), dca_txctrl
);
4798 if (q_vector
->rx
.ring
) {
4799 int q
= q_vector
->rx
.ring
->reg_idx
;
4800 u32 dca_rxctrl
= rd32(E1000_DCA_RXCTRL(q
));
4801 if (hw
->mac
.type
== e1000_82575
) {
4802 dca_rxctrl
&= ~E1000_DCA_RXCTRL_CPUID_MASK
;
4803 dca_rxctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
);
4805 dca_rxctrl
&= ~E1000_DCA_RXCTRL_CPUID_MASK_82576
;
4806 dca_rxctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
) <<
4807 E1000_DCA_RXCTRL_CPUID_SHIFT
;
4809 dca_rxctrl
|= E1000_DCA_RXCTRL_DESC_DCA_EN
;
4810 dca_rxctrl
|= E1000_DCA_RXCTRL_HEAD_DCA_EN
;
4811 dca_rxctrl
|= E1000_DCA_RXCTRL_DATA_DCA_EN
;
4812 wr32(E1000_DCA_RXCTRL(q
), dca_rxctrl
);
4814 q_vector
->cpu
= cpu
;
4819 static void igb_setup_dca(struct igb_adapter
*adapter
)
4821 struct e1000_hw
*hw
= &adapter
->hw
;
4824 if (!(adapter
->flags
& IGB_FLAG_DCA_ENABLED
))
4827 /* Always use CB2 mode, difference is masked in the CB driver. */
4828 wr32(E1000_DCA_CTRL
, E1000_DCA_CTRL_DCA_MODE_CB2
);
4830 for (i
= 0; i
< adapter
->num_q_vectors
; i
++) {
4831 adapter
->q_vector
[i
]->cpu
= -1;
4832 igb_update_dca(adapter
->q_vector
[i
]);
4836 static int __igb_notify_dca(struct device
*dev
, void *data
)
4838 struct net_device
*netdev
= dev_get_drvdata(dev
);
4839 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4840 struct pci_dev
*pdev
= adapter
->pdev
;
4841 struct e1000_hw
*hw
= &adapter
->hw
;
4842 unsigned long event
= *(unsigned long *)data
;
4845 case DCA_PROVIDER_ADD
:
4846 /* if already enabled, don't do it again */
4847 if (adapter
->flags
& IGB_FLAG_DCA_ENABLED
)
4849 if (dca_add_requester(dev
) == 0) {
4850 adapter
->flags
|= IGB_FLAG_DCA_ENABLED
;
4851 dev_info(&pdev
->dev
, "DCA enabled\n");
4852 igb_setup_dca(adapter
);
4855 /* Fall Through since DCA is disabled. */
4856 case DCA_PROVIDER_REMOVE
:
4857 if (adapter
->flags
& IGB_FLAG_DCA_ENABLED
) {
4858 /* without this a class_device is left
4859 * hanging around in the sysfs model */
4860 dca_remove_requester(dev
);
4861 dev_info(&pdev
->dev
, "DCA disabled\n");
4862 adapter
->flags
&= ~IGB_FLAG_DCA_ENABLED
;
4863 wr32(E1000_DCA_CTRL
, E1000_DCA_CTRL_DCA_MODE_DISABLE
);
4871 static int igb_notify_dca(struct notifier_block
*nb
, unsigned long event
,
4876 ret_val
= driver_for_each_device(&igb_driver
.driver
, NULL
, &event
,
4879 return ret_val
? NOTIFY_BAD
: NOTIFY_DONE
;
4881 #endif /* CONFIG_IGB_DCA */
4883 #ifdef CONFIG_PCI_IOV
4884 static int igb_vf_configure(struct igb_adapter
*adapter
, int vf
)
4886 unsigned char mac_addr
[ETH_ALEN
];
4887 struct pci_dev
*pdev
= adapter
->pdev
;
4888 struct e1000_hw
*hw
= &adapter
->hw
;
4889 struct pci_dev
*pvfdev
;
4890 unsigned int device_id
;
4893 random_ether_addr(mac_addr
);
4894 igb_set_vf_mac(adapter
, vf
, mac_addr
);
4896 switch (adapter
->hw
.mac
.type
) {
4898 device_id
= IGB_82576_VF_DEV_ID
;
4899 /* VF Stride for 82576 is 2 */
4900 thisvf_devfn
= (pdev
->devfn
+ 0x80 + (vf
<< 1)) |
4904 device_id
= IGB_I350_VF_DEV_ID
;
4905 /* VF Stride for I350 is 4 */
4906 thisvf_devfn
= (pdev
->devfn
+ 0x80 + (vf
<< 2)) |
4915 pvfdev
= pci_get_device(hw
->vendor_id
, device_id
, NULL
);
4917 if (pvfdev
->devfn
== thisvf_devfn
)
4919 pvfdev
= pci_get_device(hw
->vendor_id
,
4924 adapter
->vf_data
[vf
].vfdev
= pvfdev
;
4927 "Couldn't find pci dev ptr for VF %4.4x\n",
4929 return pvfdev
!= NULL
;
4932 static int igb_find_enabled_vfs(struct igb_adapter
*adapter
)
4934 struct e1000_hw
*hw
= &adapter
->hw
;
4935 struct pci_dev
*pdev
= adapter
->pdev
;
4936 struct pci_dev
*pvfdev
;
4939 unsigned int device_id
;
4942 switch (adapter
->hw
.mac
.type
) {
4944 device_id
= IGB_82576_VF_DEV_ID
;
4945 /* VF Stride for 82576 is 2 */
4949 device_id
= IGB_I350_VF_DEV_ID
;
4950 /* VF Stride for I350 is 4 */
4959 vf_devfn
= pdev
->devfn
+ 0x80;
4960 pvfdev
= pci_get_device(hw
->vendor_id
, device_id
, NULL
);
4962 if (pvfdev
->devfn
== vf_devfn
)
4964 vf_devfn
+= vf_stride
;
4965 pvfdev
= pci_get_device(hw
->vendor_id
,
4972 static int igb_check_vf_assignment(struct igb_adapter
*adapter
)
4975 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++) {
4976 if (adapter
->vf_data
[i
].vfdev
) {
4977 if (adapter
->vf_data
[i
].vfdev
->dev_flags
&
4978 PCI_DEV_FLAGS_ASSIGNED
)
4986 static void igb_ping_all_vfs(struct igb_adapter
*adapter
)
4988 struct e1000_hw
*hw
= &adapter
->hw
;
4992 for (i
= 0 ; i
< adapter
->vfs_allocated_count
; i
++) {
4993 ping
= E1000_PF_CONTROL_MSG
;
4994 if (adapter
->vf_data
[i
].flags
& IGB_VF_FLAG_CTS
)
4995 ping
|= E1000_VT_MSGTYPE_CTS
;
4996 igb_write_mbx(hw
, &ping
, 1, i
);
5000 static int igb_set_vf_promisc(struct igb_adapter
*adapter
, u32
*msgbuf
, u32 vf
)
5002 struct e1000_hw
*hw
= &adapter
->hw
;
5003 u32 vmolr
= rd32(E1000_VMOLR(vf
));
5004 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5006 vf_data
->flags
&= ~(IGB_VF_FLAG_UNI_PROMISC
|
5007 IGB_VF_FLAG_MULTI_PROMISC
);
5008 vmolr
&= ~(E1000_VMOLR_ROPE
| E1000_VMOLR_ROMPE
| E1000_VMOLR_MPME
);
5010 if (*msgbuf
& E1000_VF_SET_PROMISC_MULTICAST
) {
5011 vmolr
|= E1000_VMOLR_MPME
;
5012 vf_data
->flags
|= IGB_VF_FLAG_MULTI_PROMISC
;
5013 *msgbuf
&= ~E1000_VF_SET_PROMISC_MULTICAST
;
5016 * if we have hashes and we are clearing a multicast promisc
5017 * flag we need to write the hashes to the MTA as this step
5018 * was previously skipped
5020 if (vf_data
->num_vf_mc_hashes
> 30) {
5021 vmolr
|= E1000_VMOLR_MPME
;
5022 } else if (vf_data
->num_vf_mc_hashes
) {
5024 vmolr
|= E1000_VMOLR_ROMPE
;
5025 for (j
= 0; j
< vf_data
->num_vf_mc_hashes
; j
++)
5026 igb_mta_set(hw
, vf_data
->vf_mc_hashes
[j
]);
5030 wr32(E1000_VMOLR(vf
), vmolr
);
5032 /* there are flags left unprocessed, likely not supported */
5033 if (*msgbuf
& E1000_VT_MSGINFO_MASK
)
5040 static int igb_set_vf_multicasts(struct igb_adapter
*adapter
,
5041 u32
*msgbuf
, u32 vf
)
5043 int n
= (msgbuf
[0] & E1000_VT_MSGINFO_MASK
) >> E1000_VT_MSGINFO_SHIFT
;
5044 u16
*hash_list
= (u16
*)&msgbuf
[1];
5045 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5048 /* salt away the number of multicast addresses assigned
5049 * to this VF for later use to restore when the PF multi cast
5052 vf_data
->num_vf_mc_hashes
= n
;
5054 /* only up to 30 hash values supported */
5058 /* store the hashes for later use */
5059 for (i
= 0; i
< n
; i
++)
5060 vf_data
->vf_mc_hashes
[i
] = hash_list
[i
];
5062 /* Flush and reset the mta with the new values */
5063 igb_set_rx_mode(adapter
->netdev
);
5068 static void igb_restore_vf_multicasts(struct igb_adapter
*adapter
)
5070 struct e1000_hw
*hw
= &adapter
->hw
;
5071 struct vf_data_storage
*vf_data
;
5074 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++) {
5075 u32 vmolr
= rd32(E1000_VMOLR(i
));
5076 vmolr
&= ~(E1000_VMOLR_ROMPE
| E1000_VMOLR_MPME
);
5078 vf_data
= &adapter
->vf_data
[i
];
5080 if ((vf_data
->num_vf_mc_hashes
> 30) ||
5081 (vf_data
->flags
& IGB_VF_FLAG_MULTI_PROMISC
)) {
5082 vmolr
|= E1000_VMOLR_MPME
;
5083 } else if (vf_data
->num_vf_mc_hashes
) {
5084 vmolr
|= E1000_VMOLR_ROMPE
;
5085 for (j
= 0; j
< vf_data
->num_vf_mc_hashes
; j
++)
5086 igb_mta_set(hw
, vf_data
->vf_mc_hashes
[j
]);
5088 wr32(E1000_VMOLR(i
), vmolr
);
5092 static void igb_clear_vf_vfta(struct igb_adapter
*adapter
, u32 vf
)
5094 struct e1000_hw
*hw
= &adapter
->hw
;
5095 u32 pool_mask
, reg
, vid
;
5098 pool_mask
= 1 << (E1000_VLVF_POOLSEL_SHIFT
+ vf
);
5100 /* Find the vlan filter for this id */
5101 for (i
= 0; i
< E1000_VLVF_ARRAY_SIZE
; i
++) {
5102 reg
= rd32(E1000_VLVF(i
));
5104 /* remove the vf from the pool */
5107 /* if pool is empty then remove entry from vfta */
5108 if (!(reg
& E1000_VLVF_POOLSEL_MASK
) &&
5109 (reg
& E1000_VLVF_VLANID_ENABLE
)) {
5111 vid
= reg
& E1000_VLVF_VLANID_MASK
;
5112 igb_vfta_set(hw
, vid
, false);
5115 wr32(E1000_VLVF(i
), reg
);
5118 adapter
->vf_data
[vf
].vlans_enabled
= 0;
5121 static s32
igb_vlvf_set(struct igb_adapter
*adapter
, u32 vid
, bool add
, u32 vf
)
5123 struct e1000_hw
*hw
= &adapter
->hw
;
5126 /* The vlvf table only exists on 82576 hardware and newer */
5127 if (hw
->mac
.type
< e1000_82576
)
5130 /* we only need to do this if VMDq is enabled */
5131 if (!adapter
->vfs_allocated_count
)
5134 /* Find the vlan filter for this id */
5135 for (i
= 0; i
< E1000_VLVF_ARRAY_SIZE
; i
++) {
5136 reg
= rd32(E1000_VLVF(i
));
5137 if ((reg
& E1000_VLVF_VLANID_ENABLE
) &&
5138 vid
== (reg
& E1000_VLVF_VLANID_MASK
))
5143 if (i
== E1000_VLVF_ARRAY_SIZE
) {
5144 /* Did not find a matching VLAN ID entry that was
5145 * enabled. Search for a free filter entry, i.e.
5146 * one without the enable bit set
5148 for (i
= 0; i
< E1000_VLVF_ARRAY_SIZE
; i
++) {
5149 reg
= rd32(E1000_VLVF(i
));
5150 if (!(reg
& E1000_VLVF_VLANID_ENABLE
))
5154 if (i
< E1000_VLVF_ARRAY_SIZE
) {
5155 /* Found an enabled/available entry */
5156 reg
|= 1 << (E1000_VLVF_POOLSEL_SHIFT
+ vf
);
5158 /* if !enabled we need to set this up in vfta */
5159 if (!(reg
& E1000_VLVF_VLANID_ENABLE
)) {
5160 /* add VID to filter table */
5161 igb_vfta_set(hw
, vid
, true);
5162 reg
|= E1000_VLVF_VLANID_ENABLE
;
5164 reg
&= ~E1000_VLVF_VLANID_MASK
;
5166 wr32(E1000_VLVF(i
), reg
);
5168 /* do not modify RLPML for PF devices */
5169 if (vf
>= adapter
->vfs_allocated_count
)
5172 if (!adapter
->vf_data
[vf
].vlans_enabled
) {
5174 reg
= rd32(E1000_VMOLR(vf
));
5175 size
= reg
& E1000_VMOLR_RLPML_MASK
;
5177 reg
&= ~E1000_VMOLR_RLPML_MASK
;
5179 wr32(E1000_VMOLR(vf
), reg
);
5182 adapter
->vf_data
[vf
].vlans_enabled
++;
5185 if (i
< E1000_VLVF_ARRAY_SIZE
) {
5186 /* remove vf from the pool */
5187 reg
&= ~(1 << (E1000_VLVF_POOLSEL_SHIFT
+ vf
));
5188 /* if pool is empty then remove entry from vfta */
5189 if (!(reg
& E1000_VLVF_POOLSEL_MASK
)) {
5191 igb_vfta_set(hw
, vid
, false);
5193 wr32(E1000_VLVF(i
), reg
);
5195 /* do not modify RLPML for PF devices */
5196 if (vf
>= adapter
->vfs_allocated_count
)
5199 adapter
->vf_data
[vf
].vlans_enabled
--;
5200 if (!adapter
->vf_data
[vf
].vlans_enabled
) {
5202 reg
= rd32(E1000_VMOLR(vf
));
5203 size
= reg
& E1000_VMOLR_RLPML_MASK
;
5205 reg
&= ~E1000_VMOLR_RLPML_MASK
;
5207 wr32(E1000_VMOLR(vf
), reg
);
5214 static void igb_set_vmvir(struct igb_adapter
*adapter
, u32 vid
, u32 vf
)
5216 struct e1000_hw
*hw
= &adapter
->hw
;
5219 wr32(E1000_VMVIR(vf
), (vid
| E1000_VMVIR_VLANA_DEFAULT
));
5221 wr32(E1000_VMVIR(vf
), 0);
5224 static int igb_ndo_set_vf_vlan(struct net_device
*netdev
,
5225 int vf
, u16 vlan
, u8 qos
)
5228 struct igb_adapter
*adapter
= netdev_priv(netdev
);
5230 if ((vf
>= adapter
->vfs_allocated_count
) || (vlan
> 4095) || (qos
> 7))
5233 err
= igb_vlvf_set(adapter
, vlan
, !!vlan
, vf
);
5236 igb_set_vmvir(adapter
, vlan
| (qos
<< VLAN_PRIO_SHIFT
), vf
);
5237 igb_set_vmolr(adapter
, vf
, !vlan
);
5238 adapter
->vf_data
[vf
].pf_vlan
= vlan
;
5239 adapter
->vf_data
[vf
].pf_qos
= qos
;
5240 dev_info(&adapter
->pdev
->dev
,
5241 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan
, qos
, vf
);
5242 if (test_bit(__IGB_DOWN
, &adapter
->state
)) {
5243 dev_warn(&adapter
->pdev
->dev
,
5244 "The VF VLAN has been set,"
5245 " but the PF device is not up.\n");
5246 dev_warn(&adapter
->pdev
->dev
,
5247 "Bring the PF device up before"
5248 " attempting to use the VF device.\n");
5251 igb_vlvf_set(adapter
, adapter
->vf_data
[vf
].pf_vlan
,
5253 igb_set_vmvir(adapter
, vlan
, vf
);
5254 igb_set_vmolr(adapter
, vf
, true);
5255 adapter
->vf_data
[vf
].pf_vlan
= 0;
5256 adapter
->vf_data
[vf
].pf_qos
= 0;
5262 static int igb_set_vf_vlan(struct igb_adapter
*adapter
, u32
*msgbuf
, u32 vf
)
5264 int add
= (msgbuf
[0] & E1000_VT_MSGINFO_MASK
) >> E1000_VT_MSGINFO_SHIFT
;
5265 int vid
= (msgbuf
[1] & E1000_VLVF_VLANID_MASK
);
5267 return igb_vlvf_set(adapter
, vid
, add
, vf
);
5270 static inline void igb_vf_reset(struct igb_adapter
*adapter
, u32 vf
)
5272 /* clear flags - except flag that indicates PF has set the MAC */
5273 adapter
->vf_data
[vf
].flags
&= IGB_VF_FLAG_PF_SET_MAC
;
5274 adapter
->vf_data
[vf
].last_nack
= jiffies
;
5276 /* reset offloads to defaults */
5277 igb_set_vmolr(adapter
, vf
, true);
5279 /* reset vlans for device */
5280 igb_clear_vf_vfta(adapter
, vf
);
5281 if (adapter
->vf_data
[vf
].pf_vlan
)
5282 igb_ndo_set_vf_vlan(adapter
->netdev
, vf
,
5283 adapter
->vf_data
[vf
].pf_vlan
,
5284 adapter
->vf_data
[vf
].pf_qos
);
5286 igb_clear_vf_vfta(adapter
, vf
);
5288 /* reset multicast table array for vf */
5289 adapter
->vf_data
[vf
].num_vf_mc_hashes
= 0;
5291 /* Flush and reset the mta with the new values */
5292 igb_set_rx_mode(adapter
->netdev
);
5295 static void igb_vf_reset_event(struct igb_adapter
*adapter
, u32 vf
)
5297 unsigned char *vf_mac
= adapter
->vf_data
[vf
].vf_mac_addresses
;
5299 /* generate a new mac address as we were hotplug removed/added */
5300 if (!(adapter
->vf_data
[vf
].flags
& IGB_VF_FLAG_PF_SET_MAC
))
5301 random_ether_addr(vf_mac
);
5303 /* process remaining reset events */
5304 igb_vf_reset(adapter
, vf
);
5307 static void igb_vf_reset_msg(struct igb_adapter
*adapter
, u32 vf
)
5309 struct e1000_hw
*hw
= &adapter
->hw
;
5310 unsigned char *vf_mac
= adapter
->vf_data
[vf
].vf_mac_addresses
;
5311 int rar_entry
= hw
->mac
.rar_entry_count
- (vf
+ 1);
5313 u8
*addr
= (u8
*)(&msgbuf
[1]);
5315 /* process all the same items cleared in a function level reset */
5316 igb_vf_reset(adapter
, vf
);
5318 /* set vf mac address */
5319 igb_rar_set_qsel(adapter
, vf_mac
, rar_entry
, vf
);
5321 /* enable transmit and receive for vf */
5322 reg
= rd32(E1000_VFTE
);
5323 wr32(E1000_VFTE
, reg
| (1 << vf
));
5324 reg
= rd32(E1000_VFRE
);
5325 wr32(E1000_VFRE
, reg
| (1 << vf
));
5327 adapter
->vf_data
[vf
].flags
|= IGB_VF_FLAG_CTS
;
5329 /* reply to reset with ack and vf mac address */
5330 msgbuf
[0] = E1000_VF_RESET
| E1000_VT_MSGTYPE_ACK
;
5331 memcpy(addr
, vf_mac
, 6);
5332 igb_write_mbx(hw
, msgbuf
, 3, vf
);
5335 static int igb_set_vf_mac_addr(struct igb_adapter
*adapter
, u32
*msg
, int vf
)
5338 * The VF MAC Address is stored in a packed array of bytes
5339 * starting at the second 32 bit word of the msg array
5341 unsigned char *addr
= (char *)&msg
[1];
5344 if (is_valid_ether_addr(addr
))
5345 err
= igb_set_vf_mac(adapter
, vf
, addr
);
5350 static void igb_rcv_ack_from_vf(struct igb_adapter
*adapter
, u32 vf
)
5352 struct e1000_hw
*hw
= &adapter
->hw
;
5353 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5354 u32 msg
= E1000_VT_MSGTYPE_NACK
;
5356 /* if device isn't clear to send it shouldn't be reading either */
5357 if (!(vf_data
->flags
& IGB_VF_FLAG_CTS
) &&
5358 time_after(jiffies
, vf_data
->last_nack
+ (2 * HZ
))) {
5359 igb_write_mbx(hw
, &msg
, 1, vf
);
5360 vf_data
->last_nack
= jiffies
;
5364 static void igb_rcv_msg_from_vf(struct igb_adapter
*adapter
, u32 vf
)
5366 struct pci_dev
*pdev
= adapter
->pdev
;
5367 u32 msgbuf
[E1000_VFMAILBOX_SIZE
];
5368 struct e1000_hw
*hw
= &adapter
->hw
;
5369 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5372 retval
= igb_read_mbx(hw
, msgbuf
, E1000_VFMAILBOX_SIZE
, vf
);
5375 /* if receive failed revoke VF CTS stats and restart init */
5376 dev_err(&pdev
->dev
, "Error receiving message from VF\n");
5377 vf_data
->flags
&= ~IGB_VF_FLAG_CTS
;
5378 if (!time_after(jiffies
, vf_data
->last_nack
+ (2 * HZ
)))
5383 /* this is a message we already processed, do nothing */
5384 if (msgbuf
[0] & (E1000_VT_MSGTYPE_ACK
| E1000_VT_MSGTYPE_NACK
))
5388 * until the vf completes a reset it should not be
5389 * allowed to start any configuration.
5392 if (msgbuf
[0] == E1000_VF_RESET
) {
5393 igb_vf_reset_msg(adapter
, vf
);
5397 if (!(vf_data
->flags
& IGB_VF_FLAG_CTS
)) {
5398 if (!time_after(jiffies
, vf_data
->last_nack
+ (2 * HZ
)))
5404 switch ((msgbuf
[0] & 0xFFFF)) {
5405 case E1000_VF_SET_MAC_ADDR
:
5407 if (!(vf_data
->flags
& IGB_VF_FLAG_PF_SET_MAC
))
5408 retval
= igb_set_vf_mac_addr(adapter
, msgbuf
, vf
);
5410 dev_warn(&pdev
->dev
,
5411 "VF %d attempted to override administratively "
5412 "set MAC address\nReload the VF driver to "
5413 "resume operations\n", vf
);
5415 case E1000_VF_SET_PROMISC
:
5416 retval
= igb_set_vf_promisc(adapter
, msgbuf
, vf
);
5418 case E1000_VF_SET_MULTICAST
:
5419 retval
= igb_set_vf_multicasts(adapter
, msgbuf
, vf
);
5421 case E1000_VF_SET_LPE
:
5422 retval
= igb_set_vf_rlpml(adapter
, msgbuf
[1], vf
);
5424 case E1000_VF_SET_VLAN
:
5426 if (vf_data
->pf_vlan
)
5427 dev_warn(&pdev
->dev
,
5428 "VF %d attempted to override administratively "
5429 "set VLAN tag\nReload the VF driver to "
5430 "resume operations\n", vf
);
5432 retval
= igb_set_vf_vlan(adapter
, msgbuf
, vf
);
5435 dev_err(&pdev
->dev
, "Unhandled Msg %08x\n", msgbuf
[0]);
5440 msgbuf
[0] |= E1000_VT_MSGTYPE_CTS
;
5442 /* notify the VF of the results of what it sent us */
5444 msgbuf
[0] |= E1000_VT_MSGTYPE_NACK
;
5446 msgbuf
[0] |= E1000_VT_MSGTYPE_ACK
;
5448 igb_write_mbx(hw
, msgbuf
, 1, vf
);
5451 static void igb_msg_task(struct igb_adapter
*adapter
)
5453 struct e1000_hw
*hw
= &adapter
->hw
;
5456 for (vf
= 0; vf
< adapter
->vfs_allocated_count
; vf
++) {
5457 /* process any reset requests */
5458 if (!igb_check_for_rst(hw
, vf
))
5459 igb_vf_reset_event(adapter
, vf
);
5461 /* process any messages pending */
5462 if (!igb_check_for_msg(hw
, vf
))
5463 igb_rcv_msg_from_vf(adapter
, vf
);
5465 /* process any acks */
5466 if (!igb_check_for_ack(hw
, vf
))
5467 igb_rcv_ack_from_vf(adapter
, vf
);
5472 * igb_set_uta - Set unicast filter table address
5473 * @adapter: board private structure
5475 * The unicast table address is a register array of 32-bit registers.
5476 * The table is meant to be used in a way similar to how the MTA is used
5477 * however due to certain limitations in the hardware it is necessary to
5478 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5479 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5481 static void igb_set_uta(struct igb_adapter
*adapter
)
5483 struct e1000_hw
*hw
= &adapter
->hw
;
5486 /* The UTA table only exists on 82576 hardware and newer */
5487 if (hw
->mac
.type
< e1000_82576
)
5490 /* we only need to do this if VMDq is enabled */
5491 if (!adapter
->vfs_allocated_count
)
5494 for (i
= 0; i
< hw
->mac
.uta_reg_count
; i
++)
5495 array_wr32(E1000_UTA
, i
, ~0);
5499 * igb_intr_msi - Interrupt Handler
5500 * @irq: interrupt number
5501 * @data: pointer to a network interface device structure
5503 static irqreturn_t
igb_intr_msi(int irq
, void *data
)
5505 struct igb_adapter
*adapter
= data
;
5506 struct igb_q_vector
*q_vector
= adapter
->q_vector
[0];
5507 struct e1000_hw
*hw
= &adapter
->hw
;
5508 /* read ICR disables interrupts using IAM */
5509 u32 icr
= rd32(E1000_ICR
);
5511 igb_write_itr(q_vector
);
5513 if (icr
& E1000_ICR_DRSTA
)
5514 schedule_work(&adapter
->reset_task
);
5516 if (icr
& E1000_ICR_DOUTSYNC
) {
5517 /* HW is reporting DMA is out of sync */
5518 adapter
->stats
.doosync
++;
5521 if (icr
& (E1000_ICR_RXSEQ
| E1000_ICR_LSC
)) {
5522 hw
->mac
.get_link_status
= 1;
5523 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
5524 mod_timer(&adapter
->watchdog_timer
, jiffies
+ 1);
5527 napi_schedule(&q_vector
->napi
);
5533 * igb_intr - Legacy Interrupt Handler
5534 * @irq: interrupt number
5535 * @data: pointer to a network interface device structure
5537 static irqreturn_t
igb_intr(int irq
, void *data
)
5539 struct igb_adapter
*adapter
= data
;
5540 struct igb_q_vector
*q_vector
= adapter
->q_vector
[0];
5541 struct e1000_hw
*hw
= &adapter
->hw
;
5542 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5543 * need for the IMC write */
5544 u32 icr
= rd32(E1000_ICR
);
5546 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5547 * not set, then the adapter didn't send an interrupt */
5548 if (!(icr
& E1000_ICR_INT_ASSERTED
))
5551 igb_write_itr(q_vector
);
5553 if (icr
& E1000_ICR_DRSTA
)
5554 schedule_work(&adapter
->reset_task
);
5556 if (icr
& E1000_ICR_DOUTSYNC
) {
5557 /* HW is reporting DMA is out of sync */
5558 adapter
->stats
.doosync
++;
5561 if (icr
& (E1000_ICR_RXSEQ
| E1000_ICR_LSC
)) {
5562 hw
->mac
.get_link_status
= 1;
5563 /* guard against interrupt when we're going down */
5564 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
5565 mod_timer(&adapter
->watchdog_timer
, jiffies
+ 1);
5568 napi_schedule(&q_vector
->napi
);
5573 void igb_ring_irq_enable(struct igb_q_vector
*q_vector
)
5575 struct igb_adapter
*adapter
= q_vector
->adapter
;
5576 struct e1000_hw
*hw
= &adapter
->hw
;
5578 if ((q_vector
->rx
.ring
&& (adapter
->rx_itr_setting
& 3)) ||
5579 (!q_vector
->rx
.ring
&& (adapter
->tx_itr_setting
& 3))) {
5580 if ((adapter
->num_q_vectors
== 1) && !adapter
->vf_data
)
5581 igb_set_itr(q_vector
);
5583 igb_update_ring_itr(q_vector
);
5586 if (!test_bit(__IGB_DOWN
, &adapter
->state
)) {
5587 if (adapter
->msix_entries
)
5588 wr32(E1000_EIMS
, q_vector
->eims_value
);
5590 igb_irq_enable(adapter
);
5595 * igb_poll - NAPI Rx polling callback
5596 * @napi: napi polling structure
5597 * @budget: count of how many packets we should handle
5599 static int igb_poll(struct napi_struct
*napi
, int budget
)
5601 struct igb_q_vector
*q_vector
= container_of(napi
,
5602 struct igb_q_vector
,
5604 bool clean_complete
= true;
5606 #ifdef CONFIG_IGB_DCA
5607 if (q_vector
->adapter
->flags
& IGB_FLAG_DCA_ENABLED
)
5608 igb_update_dca(q_vector
);
5610 if (q_vector
->tx
.ring
)
5611 clean_complete
= igb_clean_tx_irq(q_vector
);
5613 if (q_vector
->rx
.ring
)
5614 clean_complete
&= igb_clean_rx_irq(q_vector
, budget
);
5616 /* If all work not completed, return budget and keep polling */
5617 if (!clean_complete
)
5620 /* If not enough Rx work done, exit the polling mode */
5621 napi_complete(napi
);
5622 igb_ring_irq_enable(q_vector
);
5628 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5629 * @adapter: board private structure
5630 * @shhwtstamps: timestamp structure to update
5631 * @regval: unsigned 64bit system time value.
5633 * We need to convert the system time value stored in the RX/TXSTMP registers
5634 * into a hwtstamp which can be used by the upper level timestamping functions
5636 static void igb_systim_to_hwtstamp(struct igb_adapter
*adapter
,
5637 struct skb_shared_hwtstamps
*shhwtstamps
,
5643 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5644 * 24 to match clock shift we setup earlier.
5646 if (adapter
->hw
.mac
.type
>= e1000_82580
)
5647 regval
<<= IGB_82580_TSYNC_SHIFT
;
5649 ns
= timecounter_cyc2time(&adapter
->clock
, regval
);
5650 timecompare_update(&adapter
->compare
, ns
);
5651 memset(shhwtstamps
, 0, sizeof(struct skb_shared_hwtstamps
));
5652 shhwtstamps
->hwtstamp
= ns_to_ktime(ns
);
5653 shhwtstamps
->syststamp
= timecompare_transform(&adapter
->compare
, ns
);
5657 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5658 * @q_vector: pointer to q_vector containing needed info
5659 * @buffer: pointer to igb_tx_buffer structure
5661 * If we were asked to do hardware stamping and such a time stamp is
5662 * available, then it must have been for this skb here because we only
5663 * allow only one such packet into the queue.
5665 static void igb_tx_hwtstamp(struct igb_q_vector
*q_vector
,
5666 struct igb_tx_buffer
*buffer_info
)
5668 struct igb_adapter
*adapter
= q_vector
->adapter
;
5669 struct e1000_hw
*hw
= &adapter
->hw
;
5670 struct skb_shared_hwtstamps shhwtstamps
;
5673 /* if skb does not support hw timestamp or TX stamp not valid exit */
5674 if (likely(!(buffer_info
->tx_flags
& IGB_TX_FLAGS_TSTAMP
)) ||
5675 !(rd32(E1000_TSYNCTXCTL
) & E1000_TSYNCTXCTL_VALID
))
5678 regval
= rd32(E1000_TXSTMPL
);
5679 regval
|= (u64
)rd32(E1000_TXSTMPH
) << 32;
5681 igb_systim_to_hwtstamp(adapter
, &shhwtstamps
, regval
);
5682 skb_tstamp_tx(buffer_info
->skb
, &shhwtstamps
);
5686 * igb_clean_tx_irq - Reclaim resources after transmit completes
5687 * @q_vector: pointer to q_vector containing needed info
5688 * returns true if ring is completely cleaned
5690 static bool igb_clean_tx_irq(struct igb_q_vector
*q_vector
)
5692 struct igb_adapter
*adapter
= q_vector
->adapter
;
5693 struct igb_ring
*tx_ring
= q_vector
->tx
.ring
;
5694 struct igb_tx_buffer
*tx_buffer
;
5695 union e1000_adv_tx_desc
*tx_desc
, *eop_desc
;
5696 unsigned int total_bytes
= 0, total_packets
= 0;
5697 unsigned int budget
= q_vector
->tx
.work_limit
;
5698 unsigned int i
= tx_ring
->next_to_clean
;
5700 if (test_bit(__IGB_DOWN
, &adapter
->state
))
5703 tx_buffer
= &tx_ring
->tx_buffer_info
[i
];
5704 tx_desc
= IGB_TX_DESC(tx_ring
, i
);
5705 i
-= tx_ring
->count
;
5707 for (; budget
; budget
--) {
5708 eop_desc
= tx_buffer
->next_to_watch
;
5710 /* prevent any other reads prior to eop_desc */
5713 /* if next_to_watch is not set then there is no work pending */
5717 /* if DD is not set pending work has not been completed */
5718 if (!(eop_desc
->wb
.status
& cpu_to_le32(E1000_TXD_STAT_DD
)))
5721 /* clear next_to_watch to prevent false hangs */
5722 tx_buffer
->next_to_watch
= NULL
;
5724 /* update the statistics for this packet */
5725 total_bytes
+= tx_buffer
->bytecount
;
5726 total_packets
+= tx_buffer
->gso_segs
;
5728 /* retrieve hardware timestamp */
5729 igb_tx_hwtstamp(q_vector
, tx_buffer
);
5732 dev_kfree_skb_any(tx_buffer
->skb
);
5733 tx_buffer
->skb
= NULL
;
5735 /* unmap skb header data */
5736 dma_unmap_single(tx_ring
->dev
,
5741 /* clear last DMA location and unmap remaining buffers */
5742 while (tx_desc
!= eop_desc
) {
5749 i
-= tx_ring
->count
;
5750 tx_buffer
= tx_ring
->tx_buffer_info
;
5751 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
5754 /* unmap any remaining paged data */
5755 if (tx_buffer
->dma
) {
5756 dma_unmap_page(tx_ring
->dev
,
5763 /* clear last DMA location */
5766 /* move us one more past the eop_desc for start of next pkt */
5771 i
-= tx_ring
->count
;
5772 tx_buffer
= tx_ring
->tx_buffer_info
;
5773 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
5777 i
+= tx_ring
->count
;
5778 tx_ring
->next_to_clean
= i
;
5779 u64_stats_update_begin(&tx_ring
->tx_syncp
);
5780 tx_ring
->tx_stats
.bytes
+= total_bytes
;
5781 tx_ring
->tx_stats
.packets
+= total_packets
;
5782 u64_stats_update_end(&tx_ring
->tx_syncp
);
5783 q_vector
->tx
.total_bytes
+= total_bytes
;
5784 q_vector
->tx
.total_packets
+= total_packets
;
5786 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG
, &tx_ring
->flags
)) {
5787 struct e1000_hw
*hw
= &adapter
->hw
;
5789 eop_desc
= tx_buffer
->next_to_watch
;
5791 /* Detect a transmit hang in hardware, this serializes the
5792 * check with the clearing of time_stamp and movement of i */
5793 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG
, &tx_ring
->flags
);
5795 time_after(jiffies
, tx_buffer
->time_stamp
+
5796 (adapter
->tx_timeout_factor
* HZ
)) &&
5797 !(rd32(E1000_STATUS
) & E1000_STATUS_TXOFF
)) {
5799 /* detected Tx unit hang */
5800 dev_err(tx_ring
->dev
,
5801 "Detected Tx Unit Hang\n"
5805 " next_to_use <%x>\n"
5806 " next_to_clean <%x>\n"
5807 "buffer_info[next_to_clean]\n"
5808 " time_stamp <%lx>\n"
5809 " next_to_watch <%p>\n"
5811 " desc.status <%x>\n",
5812 tx_ring
->queue_index
,
5813 rd32(E1000_TDH(tx_ring
->reg_idx
)),
5814 readl(tx_ring
->tail
),
5815 tx_ring
->next_to_use
,
5816 tx_ring
->next_to_clean
,
5817 tx_buffer
->time_stamp
,
5820 eop_desc
->wb
.status
);
5821 netif_stop_subqueue(tx_ring
->netdev
,
5822 tx_ring
->queue_index
);
5824 /* we are about to reset, no point in enabling stuff */
5829 if (unlikely(total_packets
&&
5830 netif_carrier_ok(tx_ring
->netdev
) &&
5831 igb_desc_unused(tx_ring
) >= IGB_TX_QUEUE_WAKE
)) {
5832 /* Make sure that anybody stopping the queue after this
5833 * sees the new next_to_clean.
5836 if (__netif_subqueue_stopped(tx_ring
->netdev
,
5837 tx_ring
->queue_index
) &&
5838 !(test_bit(__IGB_DOWN
, &adapter
->state
))) {
5839 netif_wake_subqueue(tx_ring
->netdev
,
5840 tx_ring
->queue_index
);
5842 u64_stats_update_begin(&tx_ring
->tx_syncp
);
5843 tx_ring
->tx_stats
.restart_queue
++;
5844 u64_stats_update_end(&tx_ring
->tx_syncp
);
5851 static inline void igb_rx_checksum(struct igb_ring
*ring
,
5852 union e1000_adv_rx_desc
*rx_desc
,
5853 struct sk_buff
*skb
)
5855 skb_checksum_none_assert(skb
);
5857 /* Ignore Checksum bit is set */
5858 if (igb_test_staterr(rx_desc
, E1000_RXD_STAT_IXSM
))
5861 /* Rx checksum disabled via ethtool */
5862 if (!(ring
->netdev
->features
& NETIF_F_RXCSUM
))
5865 /* TCP/UDP checksum error bit is set */
5866 if (igb_test_staterr(rx_desc
,
5867 E1000_RXDEXT_STATERR_TCPE
|
5868 E1000_RXDEXT_STATERR_IPE
)) {
5870 * work around errata with sctp packets where the TCPE aka
5871 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5872 * packets, (aka let the stack check the crc32c)
5874 if (!((skb
->len
== 60) &&
5875 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM
, &ring
->flags
))) {
5876 u64_stats_update_begin(&ring
->rx_syncp
);
5877 ring
->rx_stats
.csum_err
++;
5878 u64_stats_update_end(&ring
->rx_syncp
);
5880 /* let the stack verify checksum errors */
5883 /* It must be a TCP or UDP packet with a valid checksum */
5884 if (igb_test_staterr(rx_desc
, E1000_RXD_STAT_TCPCS
|
5885 E1000_RXD_STAT_UDPCS
))
5886 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
5888 dev_dbg(ring
->dev
, "cksum success: bits %08X\n",
5889 le32_to_cpu(rx_desc
->wb
.upper
.status_error
));
5892 static inline void igb_rx_hash(struct igb_ring
*ring
,
5893 union e1000_adv_rx_desc
*rx_desc
,
5894 struct sk_buff
*skb
)
5896 if (ring
->netdev
->features
& NETIF_F_RXHASH
)
5897 skb
->rxhash
= le32_to_cpu(rx_desc
->wb
.lower
.hi_dword
.rss
);
5900 static void igb_rx_hwtstamp(struct igb_q_vector
*q_vector
,
5901 union e1000_adv_rx_desc
*rx_desc
,
5902 struct sk_buff
*skb
)
5904 struct igb_adapter
*adapter
= q_vector
->adapter
;
5905 struct e1000_hw
*hw
= &adapter
->hw
;
5908 if (!igb_test_staterr(rx_desc
, E1000_RXDADV_STAT_TSIP
|
5909 E1000_RXDADV_STAT_TS
))
5913 * If this bit is set, then the RX registers contain the time stamp. No
5914 * other packet will be time stamped until we read these registers, so
5915 * read the registers to make them available again. Because only one
5916 * packet can be time stamped at a time, we know that the register
5917 * values must belong to this one here and therefore we don't need to
5918 * compare any of the additional attributes stored for it.
5920 * If nothing went wrong, then it should have a shared tx_flags that we
5921 * can turn into a skb_shared_hwtstamps.
5923 if (igb_test_staterr(rx_desc
, E1000_RXDADV_STAT_TSIP
)) {
5924 u32
*stamp
= (u32
*)skb
->data
;
5925 regval
= le32_to_cpu(*(stamp
+ 2));
5926 regval
|= (u64
)le32_to_cpu(*(stamp
+ 3)) << 32;
5927 skb_pull(skb
, IGB_TS_HDR_LEN
);
5929 if(!(rd32(E1000_TSYNCRXCTL
) & E1000_TSYNCRXCTL_VALID
))
5932 regval
= rd32(E1000_RXSTMPL
);
5933 regval
|= (u64
)rd32(E1000_RXSTMPH
) << 32;
5936 igb_systim_to_hwtstamp(adapter
, skb_hwtstamps(skb
), regval
);
5939 static void igb_rx_vlan(struct igb_ring
*ring
,
5940 union e1000_adv_rx_desc
*rx_desc
,
5941 struct sk_buff
*skb
)
5943 if (igb_test_staterr(rx_desc
, E1000_RXD_STAT_VP
)) {
5945 if (igb_test_staterr(rx_desc
, E1000_RXDEXT_STATERR_LB
) &&
5946 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP
, &ring
->flags
))
5947 vid
= be16_to_cpu(rx_desc
->wb
.upper
.vlan
);
5949 vid
= le16_to_cpu(rx_desc
->wb
.upper
.vlan
);
5951 __vlan_hwaccel_put_tag(skb
, vid
);
5955 static inline u16
igb_get_hlen(union e1000_adv_rx_desc
*rx_desc
)
5957 /* HW will not DMA in data larger than the given buffer, even if it
5958 * parses the (NFS, of course) header to be larger. In that case, it
5959 * fills the header buffer and spills the rest into the page.
5961 u16 hlen
= (le16_to_cpu(rx_desc
->wb
.lower
.lo_dword
.hdr_info
) &
5962 E1000_RXDADV_HDRBUFLEN_MASK
) >> E1000_RXDADV_HDRBUFLEN_SHIFT
;
5963 if (hlen
> IGB_RX_HDR_LEN
)
5964 hlen
= IGB_RX_HDR_LEN
;
5968 static bool igb_clean_rx_irq(struct igb_q_vector
*q_vector
, int budget
)
5970 struct igb_ring
*rx_ring
= q_vector
->rx
.ring
;
5971 union e1000_adv_rx_desc
*rx_desc
;
5972 const int current_node
= numa_node_id();
5973 unsigned int total_bytes
= 0, total_packets
= 0;
5974 u16 cleaned_count
= igb_desc_unused(rx_ring
);
5975 u16 i
= rx_ring
->next_to_clean
;
5977 rx_desc
= IGB_RX_DESC(rx_ring
, i
);
5979 while (igb_test_staterr(rx_desc
, E1000_RXD_STAT_DD
)) {
5980 struct igb_rx_buffer
*buffer_info
= &rx_ring
->rx_buffer_info
[i
];
5981 struct sk_buff
*skb
= buffer_info
->skb
;
5982 union e1000_adv_rx_desc
*next_rxd
;
5984 buffer_info
->skb
= NULL
;
5985 prefetch(skb
->data
);
5988 if (i
== rx_ring
->count
)
5991 next_rxd
= IGB_RX_DESC(rx_ring
, i
);
5995 * This memory barrier is needed to keep us from reading
5996 * any other fields out of the rx_desc until we know the
5997 * RXD_STAT_DD bit is set
6001 if (!skb_is_nonlinear(skb
)) {
6002 __skb_put(skb
, igb_get_hlen(rx_desc
));
6003 dma_unmap_single(rx_ring
->dev
, buffer_info
->dma
,
6006 buffer_info
->dma
= 0;
6009 if (rx_desc
->wb
.upper
.length
) {
6010 u16 length
= le16_to_cpu(rx_desc
->wb
.upper
.length
);
6012 skb_fill_page_desc(skb
, skb_shinfo(skb
)->nr_frags
,
6014 buffer_info
->page_offset
,
6018 skb
->data_len
+= length
;
6019 skb
->truesize
+= PAGE_SIZE
/ 2;
6021 if ((page_count(buffer_info
->page
) != 1) ||
6022 (page_to_nid(buffer_info
->page
) != current_node
))
6023 buffer_info
->page
= NULL
;
6025 get_page(buffer_info
->page
);
6027 dma_unmap_page(rx_ring
->dev
, buffer_info
->page_dma
,
6028 PAGE_SIZE
/ 2, DMA_FROM_DEVICE
);
6029 buffer_info
->page_dma
= 0;
6032 if (!igb_test_staterr(rx_desc
, E1000_RXD_STAT_EOP
)) {
6033 struct igb_rx_buffer
*next_buffer
;
6034 next_buffer
= &rx_ring
->rx_buffer_info
[i
];
6035 buffer_info
->skb
= next_buffer
->skb
;
6036 buffer_info
->dma
= next_buffer
->dma
;
6037 next_buffer
->skb
= skb
;
6038 next_buffer
->dma
= 0;
6042 if (igb_test_staterr(rx_desc
,
6043 E1000_RXDEXT_ERR_FRAME_ERR_MASK
)) {
6044 dev_kfree_skb_any(skb
);
6048 igb_rx_hwtstamp(q_vector
, rx_desc
, skb
);
6049 igb_rx_hash(rx_ring
, rx_desc
, skb
);
6050 igb_rx_checksum(rx_ring
, rx_desc
, skb
);
6051 igb_rx_vlan(rx_ring
, rx_desc
, skb
);
6053 total_bytes
+= skb
->len
;
6056 skb
->protocol
= eth_type_trans(skb
, rx_ring
->netdev
);
6058 napi_gro_receive(&q_vector
->napi
, skb
);
6066 /* return some buffers to hardware, one at a time is too slow */
6067 if (cleaned_count
>= IGB_RX_BUFFER_WRITE
) {
6068 igb_alloc_rx_buffers(rx_ring
, cleaned_count
);
6072 /* use prefetched values */
6076 rx_ring
->next_to_clean
= i
;
6077 u64_stats_update_begin(&rx_ring
->rx_syncp
);
6078 rx_ring
->rx_stats
.packets
+= total_packets
;
6079 rx_ring
->rx_stats
.bytes
+= total_bytes
;
6080 u64_stats_update_end(&rx_ring
->rx_syncp
);
6081 q_vector
->rx
.total_packets
+= total_packets
;
6082 q_vector
->rx
.total_bytes
+= total_bytes
;
6085 igb_alloc_rx_buffers(rx_ring
, cleaned_count
);
6090 static bool igb_alloc_mapped_skb(struct igb_ring
*rx_ring
,
6091 struct igb_rx_buffer
*bi
)
6093 struct sk_buff
*skb
= bi
->skb
;
6094 dma_addr_t dma
= bi
->dma
;
6100 skb
= netdev_alloc_skb_ip_align(rx_ring
->netdev
,
6104 rx_ring
->rx_stats
.alloc_failed
++;
6108 /* initialize skb for ring */
6109 skb_record_rx_queue(skb
, rx_ring
->queue_index
);
6112 dma
= dma_map_single(rx_ring
->dev
, skb
->data
,
6113 IGB_RX_HDR_LEN
, DMA_FROM_DEVICE
);
6115 if (dma_mapping_error(rx_ring
->dev
, dma
)) {
6116 rx_ring
->rx_stats
.alloc_failed
++;
6124 static bool igb_alloc_mapped_page(struct igb_ring
*rx_ring
,
6125 struct igb_rx_buffer
*bi
)
6127 struct page
*page
= bi
->page
;
6128 dma_addr_t page_dma
= bi
->page_dma
;
6129 unsigned int page_offset
= bi
->page_offset
^ (PAGE_SIZE
/ 2);
6135 page
= netdev_alloc_page(rx_ring
->netdev
);
6137 if (unlikely(!page
)) {
6138 rx_ring
->rx_stats
.alloc_failed
++;
6143 page_dma
= dma_map_page(rx_ring
->dev
, page
,
6144 page_offset
, PAGE_SIZE
/ 2,
6147 if (dma_mapping_error(rx_ring
->dev
, page_dma
)) {
6148 rx_ring
->rx_stats
.alloc_failed
++;
6152 bi
->page_dma
= page_dma
;
6153 bi
->page_offset
= page_offset
;
6158 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6159 * @adapter: address of board private structure
6161 void igb_alloc_rx_buffers(struct igb_ring
*rx_ring
, u16 cleaned_count
)
6163 union e1000_adv_rx_desc
*rx_desc
;
6164 struct igb_rx_buffer
*bi
;
6165 u16 i
= rx_ring
->next_to_use
;
6167 rx_desc
= IGB_RX_DESC(rx_ring
, i
);
6168 bi
= &rx_ring
->rx_buffer_info
[i
];
6169 i
-= rx_ring
->count
;
6171 while (cleaned_count
--) {
6172 if (!igb_alloc_mapped_skb(rx_ring
, bi
))
6175 /* Refresh the desc even if buffer_addrs didn't change
6176 * because each write-back erases this info. */
6177 rx_desc
->read
.hdr_addr
= cpu_to_le64(bi
->dma
);
6179 if (!igb_alloc_mapped_page(rx_ring
, bi
))
6182 rx_desc
->read
.pkt_addr
= cpu_to_le64(bi
->page_dma
);
6188 rx_desc
= IGB_RX_DESC(rx_ring
, 0);
6189 bi
= rx_ring
->rx_buffer_info
;
6190 i
-= rx_ring
->count
;
6193 /* clear the hdr_addr for the next_to_use descriptor */
6194 rx_desc
->read
.hdr_addr
= 0;
6197 i
+= rx_ring
->count
;
6199 if (rx_ring
->next_to_use
!= i
) {
6200 rx_ring
->next_to_use
= i
;
6202 /* Force memory writes to complete before letting h/w
6203 * know there are new descriptors to fetch. (Only
6204 * applicable for weak-ordered memory model archs,
6205 * such as IA-64). */
6207 writel(i
, rx_ring
->tail
);
6217 static int igb_mii_ioctl(struct net_device
*netdev
, struct ifreq
*ifr
, int cmd
)
6219 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6220 struct mii_ioctl_data
*data
= if_mii(ifr
);
6222 if (adapter
->hw
.phy
.media_type
!= e1000_media_type_copper
)
6227 data
->phy_id
= adapter
->hw
.phy
.addr
;
6230 if (igb_read_phy_reg(&adapter
->hw
, data
->reg_num
& 0x1F,
6242 * igb_hwtstamp_ioctl - control hardware time stamping
6247 * Outgoing time stamping can be enabled and disabled. Play nice and
6248 * disable it when requested, although it shouldn't case any overhead
6249 * when no packet needs it. At most one packet in the queue may be
6250 * marked for time stamping, otherwise it would be impossible to tell
6251 * for sure to which packet the hardware time stamp belongs.
6253 * Incoming time stamping has to be configured via the hardware
6254 * filters. Not all combinations are supported, in particular event
6255 * type has to be specified. Matching the kind of event packet is
6256 * not supported, with the exception of "all V2 events regardless of
6260 static int igb_hwtstamp_ioctl(struct net_device
*netdev
,
6261 struct ifreq
*ifr
, int cmd
)
6263 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6264 struct e1000_hw
*hw
= &adapter
->hw
;
6265 struct hwtstamp_config config
;
6266 u32 tsync_tx_ctl
= E1000_TSYNCTXCTL_ENABLED
;
6267 u32 tsync_rx_ctl
= E1000_TSYNCRXCTL_ENABLED
;
6268 u32 tsync_rx_cfg
= 0;
6273 if (copy_from_user(&config
, ifr
->ifr_data
, sizeof(config
)))
6276 /* reserved for future extensions */
6280 switch (config
.tx_type
) {
6281 case HWTSTAMP_TX_OFF
:
6283 case HWTSTAMP_TX_ON
:
6289 switch (config
.rx_filter
) {
6290 case HWTSTAMP_FILTER_NONE
:
6293 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT
:
6294 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT
:
6295 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT
:
6296 case HWTSTAMP_FILTER_ALL
:
6298 * register TSYNCRXCFG must be set, therefore it is not
6299 * possible to time stamp both Sync and Delay_Req messages
6300 * => fall back to time stamping all packets
6302 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_ALL
;
6303 config
.rx_filter
= HWTSTAMP_FILTER_ALL
;
6305 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC
:
6306 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L4_V1
;
6307 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE
;
6310 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ
:
6311 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L4_V1
;
6312 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE
;
6315 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC
:
6316 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC
:
6317 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L2_L4_V2
;
6318 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE
;
6321 config
.rx_filter
= HWTSTAMP_FILTER_SOME
;
6323 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ
:
6324 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ
:
6325 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L2_L4_V2
;
6326 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE
;
6329 config
.rx_filter
= HWTSTAMP_FILTER_SOME
;
6331 case HWTSTAMP_FILTER_PTP_V2_EVENT
:
6332 case HWTSTAMP_FILTER_PTP_V2_SYNC
:
6333 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ
:
6334 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_EVENT_V2
;
6335 config
.rx_filter
= HWTSTAMP_FILTER_PTP_V2_EVENT
;
6343 if (hw
->mac
.type
== e1000_82575
) {
6344 if (tsync_rx_ctl
| tsync_tx_ctl
)
6350 * Per-packet timestamping only works if all packets are
6351 * timestamped, so enable timestamping in all packets as
6352 * long as one rx filter was configured.
6354 if ((hw
->mac
.type
>= e1000_82580
) && tsync_rx_ctl
) {
6355 tsync_rx_ctl
= E1000_TSYNCRXCTL_ENABLED
;
6356 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_ALL
;
6359 /* enable/disable TX */
6360 regval
= rd32(E1000_TSYNCTXCTL
);
6361 regval
&= ~E1000_TSYNCTXCTL_ENABLED
;
6362 regval
|= tsync_tx_ctl
;
6363 wr32(E1000_TSYNCTXCTL
, regval
);
6365 /* enable/disable RX */
6366 regval
= rd32(E1000_TSYNCRXCTL
);
6367 regval
&= ~(E1000_TSYNCRXCTL_ENABLED
| E1000_TSYNCRXCTL_TYPE_MASK
);
6368 regval
|= tsync_rx_ctl
;
6369 wr32(E1000_TSYNCRXCTL
, regval
);
6371 /* define which PTP packets are time stamped */
6372 wr32(E1000_TSYNCRXCFG
, tsync_rx_cfg
);
6374 /* define ethertype filter for timestamped packets */
6377 (E1000_ETQF_FILTER_ENABLE
| /* enable filter */
6378 E1000_ETQF_1588
| /* enable timestamping */
6379 ETH_P_1588
)); /* 1588 eth protocol type */
6381 wr32(E1000_ETQF(3), 0);
6383 #define PTP_PORT 319
6384 /* L4 Queue Filter[3]: filter by destination port and protocol */
6386 u32 ftqf
= (IPPROTO_UDP
/* UDP */
6387 | E1000_FTQF_VF_BP
/* VF not compared */
6388 | E1000_FTQF_1588_TIME_STAMP
/* Enable Timestamping */
6389 | E1000_FTQF_MASK
); /* mask all inputs */
6390 ftqf
&= ~E1000_FTQF_MASK_PROTO_BP
; /* enable protocol check */
6392 wr32(E1000_IMIR(3), htons(PTP_PORT
));
6393 wr32(E1000_IMIREXT(3),
6394 (E1000_IMIREXT_SIZE_BP
| E1000_IMIREXT_CTRL_BP
));
6395 if (hw
->mac
.type
== e1000_82576
) {
6396 /* enable source port check */
6397 wr32(E1000_SPQF(3), htons(PTP_PORT
));
6398 ftqf
&= ~E1000_FTQF_MASK_SOURCE_PORT_BP
;
6400 wr32(E1000_FTQF(3), ftqf
);
6402 wr32(E1000_FTQF(3), E1000_FTQF_MASK
);
6406 adapter
->hwtstamp_config
= config
;
6408 /* clear TX/RX time stamp registers, just to be sure */
6409 regval
= rd32(E1000_TXSTMPH
);
6410 regval
= rd32(E1000_RXSTMPH
);
6412 return copy_to_user(ifr
->ifr_data
, &config
, sizeof(config
)) ?
6422 static int igb_ioctl(struct net_device
*netdev
, struct ifreq
*ifr
, int cmd
)
6428 return igb_mii_ioctl(netdev
, ifr
, cmd
);
6430 return igb_hwtstamp_ioctl(netdev
, ifr
, cmd
);
6436 s32
igb_read_pcie_cap_reg(struct e1000_hw
*hw
, u32 reg
, u16
*value
)
6438 struct igb_adapter
*adapter
= hw
->back
;
6441 cap_offset
= adapter
->pdev
->pcie_cap
;
6443 return -E1000_ERR_CONFIG
;
6445 pci_read_config_word(adapter
->pdev
, cap_offset
+ reg
, value
);
6450 s32
igb_write_pcie_cap_reg(struct e1000_hw
*hw
, u32 reg
, u16
*value
)
6452 struct igb_adapter
*adapter
= hw
->back
;
6455 cap_offset
= adapter
->pdev
->pcie_cap
;
6457 return -E1000_ERR_CONFIG
;
6459 pci_write_config_word(adapter
->pdev
, cap_offset
+ reg
, *value
);
6464 static void igb_vlan_mode(struct net_device
*netdev
, u32 features
)
6466 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6467 struct e1000_hw
*hw
= &adapter
->hw
;
6469 bool enable
= !!(features
& NETIF_F_HW_VLAN_RX
);
6472 /* enable VLAN tag insert/strip */
6473 ctrl
= rd32(E1000_CTRL
);
6474 ctrl
|= E1000_CTRL_VME
;
6475 wr32(E1000_CTRL
, ctrl
);
6477 /* Disable CFI check */
6478 rctl
= rd32(E1000_RCTL
);
6479 rctl
&= ~E1000_RCTL_CFIEN
;
6480 wr32(E1000_RCTL
, rctl
);
6482 /* disable VLAN tag insert/strip */
6483 ctrl
= rd32(E1000_CTRL
);
6484 ctrl
&= ~E1000_CTRL_VME
;
6485 wr32(E1000_CTRL
, ctrl
);
6488 igb_rlpml_set(adapter
);
6491 static void igb_vlan_rx_add_vid(struct net_device
*netdev
, u16 vid
)
6493 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6494 struct e1000_hw
*hw
= &adapter
->hw
;
6495 int pf_id
= adapter
->vfs_allocated_count
;
6497 /* attempt to add filter to vlvf array */
6498 igb_vlvf_set(adapter
, vid
, true, pf_id
);
6500 /* add the filter since PF can receive vlans w/o entry in vlvf */
6501 igb_vfta_set(hw
, vid
, true);
6503 set_bit(vid
, adapter
->active_vlans
);
6506 static void igb_vlan_rx_kill_vid(struct net_device
*netdev
, u16 vid
)
6508 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6509 struct e1000_hw
*hw
= &adapter
->hw
;
6510 int pf_id
= adapter
->vfs_allocated_count
;
6513 /* remove vlan from VLVF table array */
6514 err
= igb_vlvf_set(adapter
, vid
, false, pf_id
);
6516 /* if vid was not present in VLVF just remove it from table */
6518 igb_vfta_set(hw
, vid
, false);
6520 clear_bit(vid
, adapter
->active_vlans
);
6523 static void igb_restore_vlan(struct igb_adapter
*adapter
)
6527 igb_vlan_mode(adapter
->netdev
, adapter
->netdev
->features
);
6529 for_each_set_bit(vid
, adapter
->active_vlans
, VLAN_N_VID
)
6530 igb_vlan_rx_add_vid(adapter
->netdev
, vid
);
6533 int igb_set_spd_dplx(struct igb_adapter
*adapter
, u32 spd
, u8 dplx
)
6535 struct pci_dev
*pdev
= adapter
->pdev
;
6536 struct e1000_mac_info
*mac
= &adapter
->hw
.mac
;
6540 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6541 * for the switch() below to work */
6542 if ((spd
& 1) || (dplx
& ~1))
6545 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6546 if ((adapter
->hw
.phy
.media_type
== e1000_media_type_internal_serdes
) &&
6547 spd
!= SPEED_1000
&&
6548 dplx
!= DUPLEX_FULL
)
6551 switch (spd
+ dplx
) {
6552 case SPEED_10
+ DUPLEX_HALF
:
6553 mac
->forced_speed_duplex
= ADVERTISE_10_HALF
;
6555 case SPEED_10
+ DUPLEX_FULL
:
6556 mac
->forced_speed_duplex
= ADVERTISE_10_FULL
;
6558 case SPEED_100
+ DUPLEX_HALF
:
6559 mac
->forced_speed_duplex
= ADVERTISE_100_HALF
;
6561 case SPEED_100
+ DUPLEX_FULL
:
6562 mac
->forced_speed_duplex
= ADVERTISE_100_FULL
;
6564 case SPEED_1000
+ DUPLEX_FULL
:
6566 adapter
->hw
.phy
.autoneg_advertised
= ADVERTISE_1000_FULL
;
6568 case SPEED_1000
+ DUPLEX_HALF
: /* not supported */
6575 dev_err(&pdev
->dev
, "Unsupported Speed/Duplex configuration\n");
6579 static int __igb_shutdown(struct pci_dev
*pdev
, bool *enable_wake
)
6581 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6582 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6583 struct e1000_hw
*hw
= &adapter
->hw
;
6584 u32 ctrl
, rctl
, status
;
6585 u32 wufc
= adapter
->wol
;
6590 netif_device_detach(netdev
);
6592 if (netif_running(netdev
))
6595 igb_clear_interrupt_scheme(adapter
);
6598 retval
= pci_save_state(pdev
);
6603 status
= rd32(E1000_STATUS
);
6604 if (status
& E1000_STATUS_LU
)
6605 wufc
&= ~E1000_WUFC_LNKC
;
6608 igb_setup_rctl(adapter
);
6609 igb_set_rx_mode(netdev
);
6611 /* turn on all-multi mode if wake on multicast is enabled */
6612 if (wufc
& E1000_WUFC_MC
) {
6613 rctl
= rd32(E1000_RCTL
);
6614 rctl
|= E1000_RCTL_MPE
;
6615 wr32(E1000_RCTL
, rctl
);
6618 ctrl
= rd32(E1000_CTRL
);
6619 /* advertise wake from D3Cold */
6620 #define E1000_CTRL_ADVD3WUC 0x00100000
6621 /* phy power management enable */
6622 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6623 ctrl
|= E1000_CTRL_ADVD3WUC
;
6624 wr32(E1000_CTRL
, ctrl
);
6626 /* Allow time for pending master requests to run */
6627 igb_disable_pcie_master(hw
);
6629 wr32(E1000_WUC
, E1000_WUC_PME_EN
);
6630 wr32(E1000_WUFC
, wufc
);
6633 wr32(E1000_WUFC
, 0);
6636 *enable_wake
= wufc
|| adapter
->en_mng_pt
;
6638 igb_power_down_link(adapter
);
6640 igb_power_up_link(adapter
);
6642 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6643 * would have already happened in close and is redundant. */
6644 igb_release_hw_control(adapter
);
6646 pci_disable_device(pdev
);
6652 static int igb_suspend(struct pci_dev
*pdev
, pm_message_t state
)
6657 retval
= __igb_shutdown(pdev
, &wake
);
6662 pci_prepare_to_sleep(pdev
);
6664 pci_wake_from_d3(pdev
, false);
6665 pci_set_power_state(pdev
, PCI_D3hot
);
6671 static int igb_resume(struct pci_dev
*pdev
)
6673 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6674 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6675 struct e1000_hw
*hw
= &adapter
->hw
;
6678 pci_set_power_state(pdev
, PCI_D0
);
6679 pci_restore_state(pdev
);
6680 pci_save_state(pdev
);
6682 err
= pci_enable_device_mem(pdev
);
6685 "igb: Cannot enable PCI device from suspend\n");
6688 pci_set_master(pdev
);
6690 pci_enable_wake(pdev
, PCI_D3hot
, 0);
6691 pci_enable_wake(pdev
, PCI_D3cold
, 0);
6693 if (igb_init_interrupt_scheme(adapter
)) {
6694 dev_err(&pdev
->dev
, "Unable to allocate memory for queues\n");
6700 /* let the f/w know that the h/w is now under the control of the
6702 igb_get_hw_control(adapter
);
6704 wr32(E1000_WUS
, ~0);
6706 if (netif_running(netdev
)) {
6707 err
= igb_open(netdev
);
6712 netif_device_attach(netdev
);
6718 static void igb_shutdown(struct pci_dev
*pdev
)
6722 __igb_shutdown(pdev
, &wake
);
6724 if (system_state
== SYSTEM_POWER_OFF
) {
6725 pci_wake_from_d3(pdev
, wake
);
6726 pci_set_power_state(pdev
, PCI_D3hot
);
6730 #ifdef CONFIG_NET_POLL_CONTROLLER
6732 * Polling 'interrupt' - used by things like netconsole to send skbs
6733 * without having to re-enable interrupts. It's not called while
6734 * the interrupt routine is executing.
6736 static void igb_netpoll(struct net_device
*netdev
)
6738 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6739 struct e1000_hw
*hw
= &adapter
->hw
;
6740 struct igb_q_vector
*q_vector
;
6743 for (i
= 0; i
< adapter
->num_q_vectors
; i
++) {
6744 q_vector
= adapter
->q_vector
[i
];
6745 if (adapter
->msix_entries
)
6746 wr32(E1000_EIMC
, q_vector
->eims_value
);
6748 igb_irq_disable(adapter
);
6749 napi_schedule(&q_vector
->napi
);
6752 #endif /* CONFIG_NET_POLL_CONTROLLER */
6755 * igb_io_error_detected - called when PCI error is detected
6756 * @pdev: Pointer to PCI device
6757 * @state: The current pci connection state
6759 * This function is called after a PCI bus error affecting
6760 * this device has been detected.
6762 static pci_ers_result_t
igb_io_error_detected(struct pci_dev
*pdev
,
6763 pci_channel_state_t state
)
6765 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6766 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6768 netif_device_detach(netdev
);
6770 if (state
== pci_channel_io_perm_failure
)
6771 return PCI_ERS_RESULT_DISCONNECT
;
6773 if (netif_running(netdev
))
6775 pci_disable_device(pdev
);
6777 /* Request a slot slot reset. */
6778 return PCI_ERS_RESULT_NEED_RESET
;
6782 * igb_io_slot_reset - called after the pci bus has been reset.
6783 * @pdev: Pointer to PCI device
6785 * Restart the card from scratch, as if from a cold-boot. Implementation
6786 * resembles the first-half of the igb_resume routine.
6788 static pci_ers_result_t
igb_io_slot_reset(struct pci_dev
*pdev
)
6790 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6791 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6792 struct e1000_hw
*hw
= &adapter
->hw
;
6793 pci_ers_result_t result
;
6796 if (pci_enable_device_mem(pdev
)) {
6798 "Cannot re-enable PCI device after reset.\n");
6799 result
= PCI_ERS_RESULT_DISCONNECT
;
6801 pci_set_master(pdev
);
6802 pci_restore_state(pdev
);
6803 pci_save_state(pdev
);
6805 pci_enable_wake(pdev
, PCI_D3hot
, 0);
6806 pci_enable_wake(pdev
, PCI_D3cold
, 0);
6809 wr32(E1000_WUS
, ~0);
6810 result
= PCI_ERS_RESULT_RECOVERED
;
6813 err
= pci_cleanup_aer_uncorrect_error_status(pdev
);
6815 dev_err(&pdev
->dev
, "pci_cleanup_aer_uncorrect_error_status "
6816 "failed 0x%0x\n", err
);
6817 /* non-fatal, continue */
6824 * igb_io_resume - called when traffic can start flowing again.
6825 * @pdev: Pointer to PCI device
6827 * This callback is called when the error recovery driver tells us that
6828 * its OK to resume normal operation. Implementation resembles the
6829 * second-half of the igb_resume routine.
6831 static void igb_io_resume(struct pci_dev
*pdev
)
6833 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6834 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6836 if (netif_running(netdev
)) {
6837 if (igb_up(adapter
)) {
6838 dev_err(&pdev
->dev
, "igb_up failed after reset\n");
6843 netif_device_attach(netdev
);
6845 /* let the f/w know that the h/w is now under the control of the
6847 igb_get_hw_control(adapter
);
6850 static void igb_rar_set_qsel(struct igb_adapter
*adapter
, u8
*addr
, u32 index
,
6853 u32 rar_low
, rar_high
;
6854 struct e1000_hw
*hw
= &adapter
->hw
;
6856 /* HW expects these in little endian so we reverse the byte order
6857 * from network order (big endian) to little endian
6859 rar_low
= ((u32
) addr
[0] | ((u32
) addr
[1] << 8) |
6860 ((u32
) addr
[2] << 16) | ((u32
) addr
[3] << 24));
6861 rar_high
= ((u32
) addr
[4] | ((u32
) addr
[5] << 8));
6863 /* Indicate to hardware the Address is Valid. */
6864 rar_high
|= E1000_RAH_AV
;
6866 if (hw
->mac
.type
== e1000_82575
)
6867 rar_high
|= E1000_RAH_POOL_1
* qsel
;
6869 rar_high
|= E1000_RAH_POOL_1
<< qsel
;
6871 wr32(E1000_RAL(index
), rar_low
);
6873 wr32(E1000_RAH(index
), rar_high
);
6877 static int igb_set_vf_mac(struct igb_adapter
*adapter
,
6878 int vf
, unsigned char *mac_addr
)
6880 struct e1000_hw
*hw
= &adapter
->hw
;
6881 /* VF MAC addresses start at end of receive addresses and moves
6882 * torwards the first, as a result a collision should not be possible */
6883 int rar_entry
= hw
->mac
.rar_entry_count
- (vf
+ 1);
6885 memcpy(adapter
->vf_data
[vf
].vf_mac_addresses
, mac_addr
, ETH_ALEN
);
6887 igb_rar_set_qsel(adapter
, mac_addr
, rar_entry
, vf
);
6892 static int igb_ndo_set_vf_mac(struct net_device
*netdev
, int vf
, u8
*mac
)
6894 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6895 if (!is_valid_ether_addr(mac
) || (vf
>= adapter
->vfs_allocated_count
))
6897 adapter
->vf_data
[vf
].flags
|= IGB_VF_FLAG_PF_SET_MAC
;
6898 dev_info(&adapter
->pdev
->dev
, "setting MAC %pM on VF %d\n", mac
, vf
);
6899 dev_info(&adapter
->pdev
->dev
, "Reload the VF driver to make this"
6900 " change effective.");
6901 if (test_bit(__IGB_DOWN
, &adapter
->state
)) {
6902 dev_warn(&adapter
->pdev
->dev
, "The VF MAC address has been set,"
6903 " but the PF device is not up.\n");
6904 dev_warn(&adapter
->pdev
->dev
, "Bring the PF device up before"
6905 " attempting to use the VF device.\n");
6907 return igb_set_vf_mac(adapter
, vf
, mac
);
6910 static int igb_link_mbps(int internal_link_speed
)
6912 switch (internal_link_speed
) {
6922 static void igb_set_vf_rate_limit(struct e1000_hw
*hw
, int vf
, int tx_rate
,
6929 /* Calculate the rate factor values to set */
6930 rf_int
= link_speed
/ tx_rate
;
6931 rf_dec
= (link_speed
- (rf_int
* tx_rate
));
6932 rf_dec
= (rf_dec
* (1<<E1000_RTTBCNRC_RF_INT_SHIFT
)) / tx_rate
;
6934 bcnrc_val
= E1000_RTTBCNRC_RS_ENA
;
6935 bcnrc_val
|= ((rf_int
<<E1000_RTTBCNRC_RF_INT_SHIFT
) &
6936 E1000_RTTBCNRC_RF_INT_MASK
);
6937 bcnrc_val
|= (rf_dec
& E1000_RTTBCNRC_RF_DEC_MASK
);
6942 wr32(E1000_RTTDQSEL
, vf
); /* vf X uses queue X */
6943 wr32(E1000_RTTBCNRC
, bcnrc_val
);
6946 static void igb_check_vf_rate_limit(struct igb_adapter
*adapter
)
6948 int actual_link_speed
, i
;
6949 bool reset_rate
= false;
6951 /* VF TX rate limit was not set or not supported */
6952 if ((adapter
->vf_rate_link_speed
== 0) ||
6953 (adapter
->hw
.mac
.type
!= e1000_82576
))
6956 actual_link_speed
= igb_link_mbps(adapter
->link_speed
);
6957 if (actual_link_speed
!= adapter
->vf_rate_link_speed
) {
6959 adapter
->vf_rate_link_speed
= 0;
6960 dev_info(&adapter
->pdev
->dev
,
6961 "Link speed has been changed. VF Transmit "
6962 "rate is disabled\n");
6965 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++) {
6967 adapter
->vf_data
[i
].tx_rate
= 0;
6969 igb_set_vf_rate_limit(&adapter
->hw
, i
,
6970 adapter
->vf_data
[i
].tx_rate
,
6975 static int igb_ndo_set_vf_bw(struct net_device
*netdev
, int vf
, int tx_rate
)
6977 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6978 struct e1000_hw
*hw
= &adapter
->hw
;
6979 int actual_link_speed
;
6981 if (hw
->mac
.type
!= e1000_82576
)
6984 actual_link_speed
= igb_link_mbps(adapter
->link_speed
);
6985 if ((vf
>= adapter
->vfs_allocated_count
) ||
6986 (!(rd32(E1000_STATUS
) & E1000_STATUS_LU
)) ||
6987 (tx_rate
< 0) || (tx_rate
> actual_link_speed
))
6990 adapter
->vf_rate_link_speed
= actual_link_speed
;
6991 adapter
->vf_data
[vf
].tx_rate
= (u16
)tx_rate
;
6992 igb_set_vf_rate_limit(hw
, vf
, tx_rate
, actual_link_speed
);
6997 static int igb_ndo_get_vf_config(struct net_device
*netdev
,
6998 int vf
, struct ifla_vf_info
*ivi
)
7000 struct igb_adapter
*adapter
= netdev_priv(netdev
);
7001 if (vf
>= adapter
->vfs_allocated_count
)
7004 memcpy(&ivi
->mac
, adapter
->vf_data
[vf
].vf_mac_addresses
, ETH_ALEN
);
7005 ivi
->tx_rate
= adapter
->vf_data
[vf
].tx_rate
;
7006 ivi
->vlan
= adapter
->vf_data
[vf
].pf_vlan
;
7007 ivi
->qos
= adapter
->vf_data
[vf
].pf_qos
;
7011 static void igb_vmm_control(struct igb_adapter
*adapter
)
7013 struct e1000_hw
*hw
= &adapter
->hw
;
7016 switch (hw
->mac
.type
) {
7019 /* replication is not supported for 82575 */
7022 /* notify HW that the MAC is adding vlan tags */
7023 reg
= rd32(E1000_DTXCTL
);
7024 reg
|= E1000_DTXCTL_VLAN_ADDED
;
7025 wr32(E1000_DTXCTL
, reg
);
7027 /* enable replication vlan tag stripping */
7028 reg
= rd32(E1000_RPLOLR
);
7029 reg
|= E1000_RPLOLR_STRVLAN
;
7030 wr32(E1000_RPLOLR
, reg
);
7032 /* none of the above registers are supported by i350 */
7036 if (adapter
->vfs_allocated_count
) {
7037 igb_vmdq_set_loopback_pf(hw
, true);
7038 igb_vmdq_set_replication_pf(hw
, true);
7039 igb_vmdq_set_anti_spoofing_pf(hw
, true,
7040 adapter
->vfs_allocated_count
);
7042 igb_vmdq_set_loopback_pf(hw
, false);
7043 igb_vmdq_set_replication_pf(hw
, false);
7047 static void igb_init_dmac(struct igb_adapter
*adapter
, u32 pba
)
7049 struct e1000_hw
*hw
= &adapter
->hw
;
7053 if (hw
->mac
.type
> e1000_82580
) {
7054 if (adapter
->flags
& IGB_FLAG_DMAC
) {
7057 /* force threshold to 0. */
7058 wr32(E1000_DMCTXTH
, 0);
7061 * DMA Coalescing high water mark needs to be higher
7062 * than the RX threshold. set hwm to PBA - 2 * max
7065 hwm
= pba
- (2 * adapter
->max_frame_size
);
7066 reg
= rd32(E1000_DMACR
);
7067 reg
&= ~E1000_DMACR_DMACTHR_MASK
;
7070 reg
|= ((dmac_thr
<< E1000_DMACR_DMACTHR_SHIFT
)
7071 & E1000_DMACR_DMACTHR_MASK
);
7073 /* transition to L0x or L1 if available..*/
7074 reg
|= (E1000_DMACR_DMAC_EN
| E1000_DMACR_DMAC_LX_MASK
);
7076 /* watchdog timer= +-1000 usec in 32usec intervals */
7078 wr32(E1000_DMACR
, reg
);
7081 * no lower threshold to disable
7082 * coalescing(smart fifb)-UTRESH=0
7084 wr32(E1000_DMCRTRH
, 0);
7085 wr32(E1000_FCRTC
, hwm
);
7087 reg
= (IGB_DMCTLX_DCFLUSH_DIS
| 0x4);
7089 wr32(E1000_DMCTLX
, reg
);
7092 * free space in tx packet buffer to wake from
7095 wr32(E1000_DMCTXTH
, (IGB_MIN_TXPBSIZE
-
7096 (IGB_TX_BUF_4096
+ adapter
->max_frame_size
)) >> 6);
7099 * make low power state decision controlled
7102 reg
= rd32(E1000_PCIEMISC
);
7103 reg
&= ~E1000_PCIEMISC_LX_DECISION
;
7104 wr32(E1000_PCIEMISC
, reg
);
7105 } /* endif adapter->dmac is not disabled */
7106 } else if (hw
->mac
.type
== e1000_82580
) {
7107 u32 reg
= rd32(E1000_PCIEMISC
);
7108 wr32(E1000_PCIEMISC
, reg
& ~E1000_PCIEMISC_LX_DECISION
);
7109 wr32(E1000_DMACR
, 0);