1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
57 #include <linux/dca.h>
64 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
65 __stringify(BUILD) "-k"
66 char igb_driver_name
[] = "igb";
67 char igb_driver_version
[] = DRV_VERSION
;
68 static const char igb_driver_string
[] =
69 "Intel(R) Gigabit Ethernet Network Driver";
70 static const char igb_copyright
[] = "Copyright (c) 2007-2011 Intel Corporation.";
72 static const struct e1000_info
*igb_info_tbl
[] = {
73 [board_82575
] = &e1000_82575_info
,
76 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl
) = {
77 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_COPPER
), board_82575
},
78 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_FIBER
), board_82575
},
79 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_SERDES
), board_82575
},
80 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_I350_SGMII
), board_82575
},
81 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_COPPER
), board_82575
},
82 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_FIBER
), board_82575
},
83 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_QUAD_FIBER
), board_82575
},
84 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_SERDES
), board_82575
},
85 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_SGMII
), board_82575
},
86 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82580_COPPER_DUAL
), board_82575
},
87 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_SGMII
), board_82575
},
88 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_SERDES
), board_82575
},
89 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_BACKPLANE
), board_82575
},
90 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_DH89XXCC_SFP
), board_82575
},
91 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576
), board_82575
},
92 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_NS
), board_82575
},
93 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_NS_SERDES
), board_82575
},
94 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_FIBER
), board_82575
},
95 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_SERDES
), board_82575
},
96 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_SERDES_QUAD
), board_82575
},
97 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_QUAD_COPPER_ET2
), board_82575
},
98 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82576_QUAD_COPPER
), board_82575
},
99 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82575EB_COPPER
), board_82575
},
100 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82575EB_FIBER_SERDES
), board_82575
},
101 { PCI_VDEVICE(INTEL
, E1000_DEV_ID_82575GB_QUAD_COPPER
), board_82575
},
102 /* required last entry */
106 MODULE_DEVICE_TABLE(pci
, igb_pci_tbl
);
108 void igb_reset(struct igb_adapter
*);
109 static int igb_setup_all_tx_resources(struct igb_adapter
*);
110 static int igb_setup_all_rx_resources(struct igb_adapter
*);
111 static void igb_free_all_tx_resources(struct igb_adapter
*);
112 static void igb_free_all_rx_resources(struct igb_adapter
*);
113 static void igb_setup_mrqc(struct igb_adapter
*);
114 static int igb_probe(struct pci_dev
*, const struct pci_device_id
*);
115 static void __devexit
igb_remove(struct pci_dev
*pdev
);
116 static void igb_init_hw_timer(struct igb_adapter
*adapter
);
117 static int igb_sw_init(struct igb_adapter
*);
118 static int igb_open(struct net_device
*);
119 static int igb_close(struct net_device
*);
120 static void igb_configure_tx(struct igb_adapter
*);
121 static void igb_configure_rx(struct igb_adapter
*);
122 static void igb_clean_all_tx_rings(struct igb_adapter
*);
123 static void igb_clean_all_rx_rings(struct igb_adapter
*);
124 static void igb_clean_tx_ring(struct igb_ring
*);
125 static void igb_clean_rx_ring(struct igb_ring
*);
126 static void igb_set_rx_mode(struct net_device
*);
127 static void igb_update_phy_info(unsigned long);
128 static void igb_watchdog(unsigned long);
129 static void igb_watchdog_task(struct work_struct
*);
130 static netdev_tx_t
igb_xmit_frame(struct sk_buff
*skb
, struct net_device
*);
131 static struct rtnl_link_stats64
*igb_get_stats64(struct net_device
*dev
,
132 struct rtnl_link_stats64
*stats
);
133 static int igb_change_mtu(struct net_device
*, int);
134 static int igb_set_mac(struct net_device
*, void *);
135 static void igb_set_uta(struct igb_adapter
*adapter
);
136 static irqreturn_t
igb_intr(int irq
, void *);
137 static irqreturn_t
igb_intr_msi(int irq
, void *);
138 static irqreturn_t
igb_msix_other(int irq
, void *);
139 static irqreturn_t
igb_msix_ring(int irq
, void *);
140 #ifdef CONFIG_IGB_DCA
141 static void igb_update_dca(struct igb_q_vector
*);
142 static void igb_setup_dca(struct igb_adapter
*);
143 #endif /* CONFIG_IGB_DCA */
144 static int igb_poll(struct napi_struct
*, int);
145 static bool igb_clean_tx_irq(struct igb_q_vector
*);
146 static bool igb_clean_rx_irq(struct igb_q_vector
*, int);
147 static int igb_ioctl(struct net_device
*, struct ifreq
*, int cmd
);
148 static void igb_tx_timeout(struct net_device
*);
149 static void igb_reset_task(struct work_struct
*);
150 static void igb_vlan_mode(struct net_device
*netdev
, netdev_features_t features
);
151 static int igb_vlan_rx_add_vid(struct net_device
*, u16
);
152 static int igb_vlan_rx_kill_vid(struct net_device
*, u16
);
153 static void igb_restore_vlan(struct igb_adapter
*);
154 static void igb_rar_set_qsel(struct igb_adapter
*, u8
*, u32
, u8
);
155 static void igb_ping_all_vfs(struct igb_adapter
*);
156 static void igb_msg_task(struct igb_adapter
*);
157 static void igb_vmm_control(struct igb_adapter
*);
158 static int igb_set_vf_mac(struct igb_adapter
*, int, unsigned char *);
159 static void igb_restore_vf_multicasts(struct igb_adapter
*adapter
);
160 static int igb_ndo_set_vf_mac(struct net_device
*netdev
, int vf
, u8
*mac
);
161 static int igb_ndo_set_vf_vlan(struct net_device
*netdev
,
162 int vf
, u16 vlan
, u8 qos
);
163 static int igb_ndo_set_vf_bw(struct net_device
*netdev
, int vf
, int tx_rate
);
164 static int igb_ndo_get_vf_config(struct net_device
*netdev
, int vf
,
165 struct ifla_vf_info
*ivi
);
166 static void igb_check_vf_rate_limit(struct igb_adapter
*);
168 #ifdef CONFIG_PCI_IOV
169 static int igb_vf_configure(struct igb_adapter
*adapter
, int vf
);
170 static int igb_find_enabled_vfs(struct igb_adapter
*adapter
);
171 static int igb_check_vf_assignment(struct igb_adapter
*adapter
);
175 static int igb_suspend(struct pci_dev
*, pm_message_t
);
176 static int igb_resume(struct pci_dev
*);
178 static void igb_shutdown(struct pci_dev
*);
179 #ifdef CONFIG_IGB_DCA
180 static int igb_notify_dca(struct notifier_block
*, unsigned long, void *);
181 static struct notifier_block dca_notifier
= {
182 .notifier_call
= igb_notify_dca
,
187 #ifdef CONFIG_NET_POLL_CONTROLLER
188 /* for netdump / net console */
189 static void igb_netpoll(struct net_device
*);
191 #ifdef CONFIG_PCI_IOV
192 static unsigned int max_vfs
= 0;
193 module_param(max_vfs
, uint
, 0);
194 MODULE_PARM_DESC(max_vfs
, "Maximum number of virtual functions to allocate "
195 "per physical function");
196 #endif /* CONFIG_PCI_IOV */
198 static pci_ers_result_t
igb_io_error_detected(struct pci_dev
*,
199 pci_channel_state_t
);
200 static pci_ers_result_t
igb_io_slot_reset(struct pci_dev
*);
201 static void igb_io_resume(struct pci_dev
*);
203 static struct pci_error_handlers igb_err_handler
= {
204 .error_detected
= igb_io_error_detected
,
205 .slot_reset
= igb_io_slot_reset
,
206 .resume
= igb_io_resume
,
209 static void igb_init_dmac(struct igb_adapter
*adapter
, u32 pba
);
211 static struct pci_driver igb_driver
= {
212 .name
= igb_driver_name
,
213 .id_table
= igb_pci_tbl
,
215 .remove
= __devexit_p(igb_remove
),
217 /* Power Management Hooks */
218 .suspend
= igb_suspend
,
219 .resume
= igb_resume
,
221 .shutdown
= igb_shutdown
,
222 .err_handler
= &igb_err_handler
225 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
226 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
227 MODULE_LICENSE("GPL");
228 MODULE_VERSION(DRV_VERSION
);
230 struct igb_reg_info
{
235 static const struct igb_reg_info igb_reg_info_tbl
[] = {
237 /* General Registers */
238 {E1000_CTRL
, "CTRL"},
239 {E1000_STATUS
, "STATUS"},
240 {E1000_CTRL_EXT
, "CTRL_EXT"},
242 /* Interrupt Registers */
246 {E1000_RCTL
, "RCTL"},
247 {E1000_RDLEN(0), "RDLEN"},
248 {E1000_RDH(0), "RDH"},
249 {E1000_RDT(0), "RDT"},
250 {E1000_RXDCTL(0), "RXDCTL"},
251 {E1000_RDBAL(0), "RDBAL"},
252 {E1000_RDBAH(0), "RDBAH"},
255 {E1000_TCTL
, "TCTL"},
256 {E1000_TDBAL(0), "TDBAL"},
257 {E1000_TDBAH(0), "TDBAH"},
258 {E1000_TDLEN(0), "TDLEN"},
259 {E1000_TDH(0), "TDH"},
260 {E1000_TDT(0), "TDT"},
261 {E1000_TXDCTL(0), "TXDCTL"},
262 {E1000_TDFH
, "TDFH"},
263 {E1000_TDFT
, "TDFT"},
264 {E1000_TDFHS
, "TDFHS"},
265 {E1000_TDFPC
, "TDFPC"},
267 /* List Terminator */
272 * igb_regdump - register printout routine
274 static void igb_regdump(struct e1000_hw
*hw
, struct igb_reg_info
*reginfo
)
280 switch (reginfo
->ofs
) {
282 for (n
= 0; n
< 4; n
++)
283 regs
[n
] = rd32(E1000_RDLEN(n
));
286 for (n
= 0; n
< 4; n
++)
287 regs
[n
] = rd32(E1000_RDH(n
));
290 for (n
= 0; n
< 4; n
++)
291 regs
[n
] = rd32(E1000_RDT(n
));
293 case E1000_RXDCTL(0):
294 for (n
= 0; n
< 4; n
++)
295 regs
[n
] = rd32(E1000_RXDCTL(n
));
298 for (n
= 0; n
< 4; n
++)
299 regs
[n
] = rd32(E1000_RDBAL(n
));
302 for (n
= 0; n
< 4; n
++)
303 regs
[n
] = rd32(E1000_RDBAH(n
));
306 for (n
= 0; n
< 4; n
++)
307 regs
[n
] = rd32(E1000_RDBAL(n
));
310 for (n
= 0; n
< 4; n
++)
311 regs
[n
] = rd32(E1000_TDBAH(n
));
314 for (n
= 0; n
< 4; n
++)
315 regs
[n
] = rd32(E1000_TDLEN(n
));
318 for (n
= 0; n
< 4; n
++)
319 regs
[n
] = rd32(E1000_TDH(n
));
322 for (n
= 0; n
< 4; n
++)
323 regs
[n
] = rd32(E1000_TDT(n
));
325 case E1000_TXDCTL(0):
326 for (n
= 0; n
< 4; n
++)
327 regs
[n
] = rd32(E1000_TXDCTL(n
));
330 pr_info("%-15s %08x\n", reginfo
->name
, rd32(reginfo
->ofs
));
334 snprintf(rname
, 16, "%s%s", reginfo
->name
, "[0-3]");
335 pr_info("%-15s %08x %08x %08x %08x\n", rname
, regs
[0], regs
[1],
340 * igb_dump - Print registers, tx-rings and rx-rings
342 static void igb_dump(struct igb_adapter
*adapter
)
344 struct net_device
*netdev
= adapter
->netdev
;
345 struct e1000_hw
*hw
= &adapter
->hw
;
346 struct igb_reg_info
*reginfo
;
347 struct igb_ring
*tx_ring
;
348 union e1000_adv_tx_desc
*tx_desc
;
349 struct my_u0
{ u64 a
; u64 b
; } *u0
;
350 struct igb_ring
*rx_ring
;
351 union e1000_adv_rx_desc
*rx_desc
;
355 if (!netif_msg_hw(adapter
))
358 /* Print netdevice Info */
360 dev_info(&adapter
->pdev
->dev
, "Net device Info\n");
361 pr_info("Device Name state trans_start "
363 pr_info("%-15s %016lX %016lX %016lX\n", netdev
->name
,
364 netdev
->state
, netdev
->trans_start
, netdev
->last_rx
);
367 /* Print Registers */
368 dev_info(&adapter
->pdev
->dev
, "Register Dump\n");
369 pr_info(" Register Name Value\n");
370 for (reginfo
= (struct igb_reg_info
*)igb_reg_info_tbl
;
371 reginfo
->name
; reginfo
++) {
372 igb_regdump(hw
, reginfo
);
375 /* Print TX Ring Summary */
376 if (!netdev
|| !netif_running(netdev
))
379 dev_info(&adapter
->pdev
->dev
, "TX Rings Summary\n");
380 pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n");
381 for (n
= 0; n
< adapter
->num_tx_queues
; n
++) {
382 struct igb_tx_buffer
*buffer_info
;
383 tx_ring
= adapter
->tx_ring
[n
];
384 buffer_info
= &tx_ring
->tx_buffer_info
[tx_ring
->next_to_clean
];
385 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
386 n
, tx_ring
->next_to_use
, tx_ring
->next_to_clean
,
387 (u64
)buffer_info
->dma
,
389 buffer_info
->next_to_watch
,
390 (u64
)buffer_info
->time_stamp
);
394 if (!netif_msg_tx_done(adapter
))
395 goto rx_ring_summary
;
397 dev_info(&adapter
->pdev
->dev
, "TX Rings Dump\n");
399 /* Transmit Descriptor Formats
401 * Advanced Transmit Descriptor
402 * +--------------------------------------------------------------+
403 * 0 | Buffer Address [63:0] |
404 * +--------------------------------------------------------------+
405 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
406 * +--------------------------------------------------------------+
407 * 63 46 45 40 39 38 36 35 32 31 24 15 0
410 for (n
= 0; n
< adapter
->num_tx_queues
; n
++) {
411 tx_ring
= adapter
->tx_ring
[n
];
412 pr_info("------------------------------------\n");
413 pr_info("TX QUEUE INDEX = %d\n", tx_ring
->queue_index
);
414 pr_info("------------------------------------\n");
415 pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] "
416 "[bi->dma ] leng ntw timestamp "
419 for (i
= 0; tx_ring
->desc
&& (i
< tx_ring
->count
); i
++) {
420 const char *next_desc
;
421 struct igb_tx_buffer
*buffer_info
;
422 tx_desc
= IGB_TX_DESC(tx_ring
, i
);
423 buffer_info
= &tx_ring
->tx_buffer_info
[i
];
424 u0
= (struct my_u0
*)tx_desc
;
425 if (i
== tx_ring
->next_to_use
&&
426 i
== tx_ring
->next_to_clean
)
427 next_desc
= " NTC/U";
428 else if (i
== tx_ring
->next_to_use
)
430 else if (i
== tx_ring
->next_to_clean
)
435 pr_info("T [0x%03X] %016llX %016llX %016llX"
436 " %04X %p %016llX %p%s\n", i
,
439 (u64
)buffer_info
->dma
,
441 buffer_info
->next_to_watch
,
442 (u64
)buffer_info
->time_stamp
,
443 buffer_info
->skb
, next_desc
);
445 if (netif_msg_pktdata(adapter
) && buffer_info
->dma
!= 0)
446 print_hex_dump(KERN_INFO
, "",
448 16, 1, phys_to_virt(buffer_info
->dma
),
449 buffer_info
->length
, true);
453 /* Print RX Rings Summary */
455 dev_info(&adapter
->pdev
->dev
, "RX Rings Summary\n");
456 pr_info("Queue [NTU] [NTC]\n");
457 for (n
= 0; n
< adapter
->num_rx_queues
; n
++) {
458 rx_ring
= adapter
->rx_ring
[n
];
459 pr_info(" %5d %5X %5X\n",
460 n
, rx_ring
->next_to_use
, rx_ring
->next_to_clean
);
464 if (!netif_msg_rx_status(adapter
))
467 dev_info(&adapter
->pdev
->dev
, "RX Rings Dump\n");
469 /* Advanced Receive Descriptor (Read) Format
471 * +-----------------------------------------------------+
472 * 0 | Packet Buffer Address [63:1] |A0/NSE|
473 * +----------------------------------------------+------+
474 * 8 | Header Buffer Address [63:1] | DD |
475 * +-----------------------------------------------------+
478 * Advanced Receive Descriptor (Write-Back) Format
480 * 63 48 47 32 31 30 21 20 17 16 4 3 0
481 * +------------------------------------------------------+
482 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
483 * | Checksum Ident | | | | Type | Type |
484 * +------------------------------------------------------+
485 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
486 * +------------------------------------------------------+
487 * 63 48 47 32 31 20 19 0
490 for (n
= 0; n
< adapter
->num_rx_queues
; n
++) {
491 rx_ring
= adapter
->rx_ring
[n
];
492 pr_info("------------------------------------\n");
493 pr_info("RX QUEUE INDEX = %d\n", rx_ring
->queue_index
);
494 pr_info("------------------------------------\n");
495 pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] "
496 "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n");
497 pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----"
498 "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
500 for (i
= 0; i
< rx_ring
->count
; i
++) {
501 const char *next_desc
;
502 struct igb_rx_buffer
*buffer_info
;
503 buffer_info
= &rx_ring
->rx_buffer_info
[i
];
504 rx_desc
= IGB_RX_DESC(rx_ring
, i
);
505 u0
= (struct my_u0
*)rx_desc
;
506 staterr
= le32_to_cpu(rx_desc
->wb
.upper
.status_error
);
508 if (i
== rx_ring
->next_to_use
)
510 else if (i
== rx_ring
->next_to_clean
)
515 if (staterr
& E1000_RXD_STAT_DD
) {
516 /* Descriptor Done */
517 pr_info("%s[0x%03X] %016llX %016llX -------"
518 "--------- %p%s\n", "RWB", i
,
521 buffer_info
->skb
, next_desc
);
523 pr_info("%s[0x%03X] %016llX %016llX %016llX"
527 (u64
)buffer_info
->dma
,
528 buffer_info
->skb
, next_desc
);
530 if (netif_msg_pktdata(adapter
)) {
531 print_hex_dump(KERN_INFO
, "",
534 phys_to_virt(buffer_info
->dma
),
535 IGB_RX_HDR_LEN
, true);
536 print_hex_dump(KERN_INFO
, "",
540 buffer_info
->page_dma
+
541 buffer_info
->page_offset
),
554 * igb_read_clock - read raw cycle counter (to be used by time counter)
556 static cycle_t
igb_read_clock(const struct cyclecounter
*tc
)
558 struct igb_adapter
*adapter
=
559 container_of(tc
, struct igb_adapter
, cycles
);
560 struct e1000_hw
*hw
= &adapter
->hw
;
565 * The timestamp latches on lowest register read. For the 82580
566 * the lowest register is SYSTIMR instead of SYSTIML. However we never
567 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
569 if (hw
->mac
.type
>= e1000_82580
) {
570 stamp
= rd32(E1000_SYSTIMR
) >> 8;
571 shift
= IGB_82580_TSYNC_SHIFT
;
574 stamp
|= (u64
)rd32(E1000_SYSTIML
) << shift
;
575 stamp
|= (u64
)rd32(E1000_SYSTIMH
) << (shift
+ 32);
580 * igb_get_hw_dev - return device
581 * used by hardware layer to print debugging information
583 struct net_device
*igb_get_hw_dev(struct e1000_hw
*hw
)
585 struct igb_adapter
*adapter
= hw
->back
;
586 return adapter
->netdev
;
590 * igb_init_module - Driver Registration Routine
592 * igb_init_module is the first routine called when the driver is
593 * loaded. All it does is register with the PCI subsystem.
595 static int __init
igb_init_module(void)
598 pr_info("%s - version %s\n",
599 igb_driver_string
, igb_driver_version
);
601 pr_info("%s\n", igb_copyright
);
603 #ifdef CONFIG_IGB_DCA
604 dca_register_notify(&dca_notifier
);
606 ret
= pci_register_driver(&igb_driver
);
610 module_init(igb_init_module
);
613 * igb_exit_module - Driver Exit Cleanup Routine
615 * igb_exit_module is called just before the driver is removed
618 static void __exit
igb_exit_module(void)
620 #ifdef CONFIG_IGB_DCA
621 dca_unregister_notify(&dca_notifier
);
623 pci_unregister_driver(&igb_driver
);
626 module_exit(igb_exit_module
);
628 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
630 * igb_cache_ring_register - Descriptor ring to register mapping
631 * @adapter: board private structure to initialize
633 * Once we know the feature-set enabled for the device, we'll cache
634 * the register offset the descriptor ring is assigned to.
636 static void igb_cache_ring_register(struct igb_adapter
*adapter
)
639 u32 rbase_offset
= adapter
->vfs_allocated_count
;
641 switch (adapter
->hw
.mac
.type
) {
643 /* The queues are allocated for virtualization such that VF 0
644 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
645 * In order to avoid collision we start at the first free queue
646 * and continue consuming queues in the same sequence
648 if (adapter
->vfs_allocated_count
) {
649 for (; i
< adapter
->rss_queues
; i
++)
650 adapter
->rx_ring
[i
]->reg_idx
= rbase_offset
+
657 for (; i
< adapter
->num_rx_queues
; i
++)
658 adapter
->rx_ring
[i
]->reg_idx
= rbase_offset
+ i
;
659 for (; j
< adapter
->num_tx_queues
; j
++)
660 adapter
->tx_ring
[j
]->reg_idx
= rbase_offset
+ j
;
665 static void igb_free_queues(struct igb_adapter
*adapter
)
669 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
670 kfree(adapter
->tx_ring
[i
]);
671 adapter
->tx_ring
[i
] = NULL
;
673 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
674 kfree(adapter
->rx_ring
[i
]);
675 adapter
->rx_ring
[i
] = NULL
;
677 adapter
->num_rx_queues
= 0;
678 adapter
->num_tx_queues
= 0;
682 * igb_alloc_queues - Allocate memory for all rings
683 * @adapter: board private structure to initialize
685 * We allocate one ring per queue at run-time since we don't know the
686 * number of queues at compile-time.
688 static int igb_alloc_queues(struct igb_adapter
*adapter
)
690 struct igb_ring
*ring
;
692 int orig_node
= adapter
->node
;
694 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
695 if (orig_node
== -1) {
696 int cur_node
= next_online_node(adapter
->node
);
697 if (cur_node
== MAX_NUMNODES
)
698 cur_node
= first_online_node
;
699 adapter
->node
= cur_node
;
701 ring
= kzalloc_node(sizeof(struct igb_ring
), GFP_KERNEL
,
704 ring
= kzalloc(sizeof(struct igb_ring
), GFP_KERNEL
);
707 ring
->count
= adapter
->tx_ring_count
;
708 ring
->queue_index
= i
;
709 ring
->dev
= &adapter
->pdev
->dev
;
710 ring
->netdev
= adapter
->netdev
;
711 ring
->numa_node
= adapter
->node
;
712 /* For 82575, context index must be unique per ring. */
713 if (adapter
->hw
.mac
.type
== e1000_82575
)
714 set_bit(IGB_RING_FLAG_TX_CTX_IDX
, &ring
->flags
);
715 adapter
->tx_ring
[i
] = ring
;
717 /* Restore the adapter's original node */
718 adapter
->node
= orig_node
;
720 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
721 if (orig_node
== -1) {
722 int cur_node
= next_online_node(adapter
->node
);
723 if (cur_node
== MAX_NUMNODES
)
724 cur_node
= first_online_node
;
725 adapter
->node
= cur_node
;
727 ring
= kzalloc_node(sizeof(struct igb_ring
), GFP_KERNEL
,
730 ring
= kzalloc(sizeof(struct igb_ring
), GFP_KERNEL
);
733 ring
->count
= adapter
->rx_ring_count
;
734 ring
->queue_index
= i
;
735 ring
->dev
= &adapter
->pdev
->dev
;
736 ring
->netdev
= adapter
->netdev
;
737 ring
->numa_node
= adapter
->node
;
738 /* set flag indicating ring supports SCTP checksum offload */
739 if (adapter
->hw
.mac
.type
>= e1000_82576
)
740 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM
, &ring
->flags
);
742 /* On i350, loopback VLAN packets have the tag byte-swapped. */
743 if (adapter
->hw
.mac
.type
== e1000_i350
)
744 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP
, &ring
->flags
);
746 adapter
->rx_ring
[i
] = ring
;
748 /* Restore the adapter's original node */
749 adapter
->node
= orig_node
;
751 igb_cache_ring_register(adapter
);
756 /* Restore the adapter's original node */
757 adapter
->node
= orig_node
;
758 igb_free_queues(adapter
);
764 * igb_write_ivar - configure ivar for given MSI-X vector
765 * @hw: pointer to the HW structure
766 * @msix_vector: vector number we are allocating to a given ring
767 * @index: row index of IVAR register to write within IVAR table
768 * @offset: column offset of in IVAR, should be multiple of 8
770 * This function is intended to handle the writing of the IVAR register
771 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
772 * each containing an cause allocation for an Rx and Tx ring, and a
773 * variable number of rows depending on the number of queues supported.
775 static void igb_write_ivar(struct e1000_hw
*hw
, int msix_vector
,
776 int index
, int offset
)
778 u32 ivar
= array_rd32(E1000_IVAR0
, index
);
780 /* clear any bits that are currently set */
781 ivar
&= ~((u32
)0xFF << offset
);
783 /* write vector and valid bit */
784 ivar
|= (msix_vector
| E1000_IVAR_VALID
) << offset
;
786 array_wr32(E1000_IVAR0
, index
, ivar
);
789 #define IGB_N0_QUEUE -1
790 static void igb_assign_vector(struct igb_q_vector
*q_vector
, int msix_vector
)
792 struct igb_adapter
*adapter
= q_vector
->adapter
;
793 struct e1000_hw
*hw
= &adapter
->hw
;
794 int rx_queue
= IGB_N0_QUEUE
;
795 int tx_queue
= IGB_N0_QUEUE
;
798 if (q_vector
->rx
.ring
)
799 rx_queue
= q_vector
->rx
.ring
->reg_idx
;
800 if (q_vector
->tx
.ring
)
801 tx_queue
= q_vector
->tx
.ring
->reg_idx
;
803 switch (hw
->mac
.type
) {
805 /* The 82575 assigns vectors using a bitmask, which matches the
806 bitmask for the EICR/EIMS/EIMC registers. To assign one
807 or more queues to a vector, we write the appropriate bits
808 into the MSIXBM register for that vector. */
809 if (rx_queue
> IGB_N0_QUEUE
)
810 msixbm
= E1000_EICR_RX_QUEUE0
<< rx_queue
;
811 if (tx_queue
> IGB_N0_QUEUE
)
812 msixbm
|= E1000_EICR_TX_QUEUE0
<< tx_queue
;
813 if (!adapter
->msix_entries
&& msix_vector
== 0)
814 msixbm
|= E1000_EIMS_OTHER
;
815 array_wr32(E1000_MSIXBM(0), msix_vector
, msixbm
);
816 q_vector
->eims_value
= msixbm
;
820 * 82576 uses a table that essentially consists of 2 columns
821 * with 8 rows. The ordering is column-major so we use the
822 * lower 3 bits as the row index, and the 4th bit as the
825 if (rx_queue
> IGB_N0_QUEUE
)
826 igb_write_ivar(hw
, msix_vector
,
828 (rx_queue
& 0x8) << 1);
829 if (tx_queue
> IGB_N0_QUEUE
)
830 igb_write_ivar(hw
, msix_vector
,
832 ((tx_queue
& 0x8) << 1) + 8);
833 q_vector
->eims_value
= 1 << msix_vector
;
838 * On 82580 and newer adapters the scheme is similar to 82576
839 * however instead of ordering column-major we have things
840 * ordered row-major. So we traverse the table by using
841 * bit 0 as the column offset, and the remaining bits as the
844 if (rx_queue
> IGB_N0_QUEUE
)
845 igb_write_ivar(hw
, msix_vector
,
847 (rx_queue
& 0x1) << 4);
848 if (tx_queue
> IGB_N0_QUEUE
)
849 igb_write_ivar(hw
, msix_vector
,
851 ((tx_queue
& 0x1) << 4) + 8);
852 q_vector
->eims_value
= 1 << msix_vector
;
859 /* add q_vector eims value to global eims_enable_mask */
860 adapter
->eims_enable_mask
|= q_vector
->eims_value
;
862 /* configure q_vector to set itr on first interrupt */
863 q_vector
->set_itr
= 1;
867 * igb_configure_msix - Configure MSI-X hardware
869 * igb_configure_msix sets up the hardware to properly
870 * generate MSI-X interrupts.
872 static void igb_configure_msix(struct igb_adapter
*adapter
)
876 struct e1000_hw
*hw
= &adapter
->hw
;
878 adapter
->eims_enable_mask
= 0;
880 /* set vector for other causes, i.e. link changes */
881 switch (hw
->mac
.type
) {
883 tmp
= rd32(E1000_CTRL_EXT
);
884 /* enable MSI-X PBA support*/
885 tmp
|= E1000_CTRL_EXT_PBA_CLR
;
887 /* Auto-Mask interrupts upon ICR read. */
888 tmp
|= E1000_CTRL_EXT_EIAME
;
889 tmp
|= E1000_CTRL_EXT_IRCA
;
891 wr32(E1000_CTRL_EXT
, tmp
);
893 /* enable msix_other interrupt */
894 array_wr32(E1000_MSIXBM(0), vector
++,
896 adapter
->eims_other
= E1000_EIMS_OTHER
;
903 /* Turn on MSI-X capability first, or our settings
904 * won't stick. And it will take days to debug. */
905 wr32(E1000_GPIE
, E1000_GPIE_MSIX_MODE
|
906 E1000_GPIE_PBA
| E1000_GPIE_EIAME
|
909 /* enable msix_other interrupt */
910 adapter
->eims_other
= 1 << vector
;
911 tmp
= (vector
++ | E1000_IVAR_VALID
) << 8;
913 wr32(E1000_IVAR_MISC
, tmp
);
916 /* do nothing, since nothing else supports MSI-X */
918 } /* switch (hw->mac.type) */
920 adapter
->eims_enable_mask
|= adapter
->eims_other
;
922 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
923 igb_assign_vector(adapter
->q_vector
[i
], vector
++);
929 * igb_request_msix - Initialize MSI-X interrupts
931 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
934 static int igb_request_msix(struct igb_adapter
*adapter
)
936 struct net_device
*netdev
= adapter
->netdev
;
937 struct e1000_hw
*hw
= &adapter
->hw
;
938 int i
, err
= 0, vector
= 0;
940 err
= request_irq(adapter
->msix_entries
[vector
].vector
,
941 igb_msix_other
, 0, netdev
->name
, adapter
);
946 for (i
= 0; i
< adapter
->num_q_vectors
; i
++) {
947 struct igb_q_vector
*q_vector
= adapter
->q_vector
[i
];
949 q_vector
->itr_register
= hw
->hw_addr
+ E1000_EITR(vector
);
951 if (q_vector
->rx
.ring
&& q_vector
->tx
.ring
)
952 sprintf(q_vector
->name
, "%s-TxRx-%u", netdev
->name
,
953 q_vector
->rx
.ring
->queue_index
);
954 else if (q_vector
->tx
.ring
)
955 sprintf(q_vector
->name
, "%s-tx-%u", netdev
->name
,
956 q_vector
->tx
.ring
->queue_index
);
957 else if (q_vector
->rx
.ring
)
958 sprintf(q_vector
->name
, "%s-rx-%u", netdev
->name
,
959 q_vector
->rx
.ring
->queue_index
);
961 sprintf(q_vector
->name
, "%s-unused", netdev
->name
);
963 err
= request_irq(adapter
->msix_entries
[vector
].vector
,
964 igb_msix_ring
, 0, q_vector
->name
,
971 igb_configure_msix(adapter
);
977 static void igb_reset_interrupt_capability(struct igb_adapter
*adapter
)
979 if (adapter
->msix_entries
) {
980 pci_disable_msix(adapter
->pdev
);
981 kfree(adapter
->msix_entries
);
982 adapter
->msix_entries
= NULL
;
983 } else if (adapter
->flags
& IGB_FLAG_HAS_MSI
) {
984 pci_disable_msi(adapter
->pdev
);
989 * igb_free_q_vectors - Free memory allocated for interrupt vectors
990 * @adapter: board private structure to initialize
992 * This function frees the memory allocated to the q_vectors. In addition if
993 * NAPI is enabled it will delete any references to the NAPI struct prior
994 * to freeing the q_vector.
996 static void igb_free_q_vectors(struct igb_adapter
*adapter
)
1000 for (v_idx
= 0; v_idx
< adapter
->num_q_vectors
; v_idx
++) {
1001 struct igb_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1002 adapter
->q_vector
[v_idx
] = NULL
;
1005 netif_napi_del(&q_vector
->napi
);
1008 adapter
->num_q_vectors
= 0;
1012 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1014 * This function resets the device so that it has 0 rx queues, tx queues, and
1015 * MSI-X interrupts allocated.
1017 static void igb_clear_interrupt_scheme(struct igb_adapter
*adapter
)
1019 igb_free_queues(adapter
);
1020 igb_free_q_vectors(adapter
);
1021 igb_reset_interrupt_capability(adapter
);
1025 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1027 * Attempt to configure interrupts using the best available
1028 * capabilities of the hardware and kernel.
1030 static int igb_set_interrupt_capability(struct igb_adapter
*adapter
)
1035 /* Number of supported queues. */
1036 adapter
->num_rx_queues
= adapter
->rss_queues
;
1037 if (adapter
->vfs_allocated_count
)
1038 adapter
->num_tx_queues
= 1;
1040 adapter
->num_tx_queues
= adapter
->rss_queues
;
1042 /* start with one vector for every rx queue */
1043 numvecs
= adapter
->num_rx_queues
;
1045 /* if tx handler is separate add 1 for every tx queue */
1046 if (!(adapter
->flags
& IGB_FLAG_QUEUE_PAIRS
))
1047 numvecs
+= adapter
->num_tx_queues
;
1049 /* store the number of vectors reserved for queues */
1050 adapter
->num_q_vectors
= numvecs
;
1052 /* add 1 vector for link status interrupts */
1054 adapter
->msix_entries
= kcalloc(numvecs
, sizeof(struct msix_entry
),
1056 if (!adapter
->msix_entries
)
1059 for (i
= 0; i
< numvecs
; i
++)
1060 adapter
->msix_entries
[i
].entry
= i
;
1062 err
= pci_enable_msix(adapter
->pdev
,
1063 adapter
->msix_entries
,
1068 igb_reset_interrupt_capability(adapter
);
1070 /* If we can't do MSI-X, try MSI */
1072 #ifdef CONFIG_PCI_IOV
1073 /* disable SR-IOV for non MSI-X configurations */
1074 if (adapter
->vf_data
) {
1075 struct e1000_hw
*hw
= &adapter
->hw
;
1076 /* disable iov and allow time for transactions to clear */
1077 pci_disable_sriov(adapter
->pdev
);
1080 kfree(adapter
->vf_data
);
1081 adapter
->vf_data
= NULL
;
1082 wr32(E1000_IOVCTL
, E1000_IOVCTL_REUSE_VFQ
);
1085 dev_info(&adapter
->pdev
->dev
, "IOV Disabled\n");
1088 adapter
->vfs_allocated_count
= 0;
1089 adapter
->rss_queues
= 1;
1090 adapter
->flags
|= IGB_FLAG_QUEUE_PAIRS
;
1091 adapter
->num_rx_queues
= 1;
1092 adapter
->num_tx_queues
= 1;
1093 adapter
->num_q_vectors
= 1;
1094 if (!pci_enable_msi(adapter
->pdev
))
1095 adapter
->flags
|= IGB_FLAG_HAS_MSI
;
1097 /* Notify the stack of the (possibly) reduced queue counts. */
1098 netif_set_real_num_tx_queues(adapter
->netdev
, adapter
->num_tx_queues
);
1099 return netif_set_real_num_rx_queues(adapter
->netdev
,
1100 adapter
->num_rx_queues
);
1104 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1105 * @adapter: board private structure to initialize
1107 * We allocate one q_vector per queue interrupt. If allocation fails we
1110 static int igb_alloc_q_vectors(struct igb_adapter
*adapter
)
1112 struct igb_q_vector
*q_vector
;
1113 struct e1000_hw
*hw
= &adapter
->hw
;
1115 int orig_node
= adapter
->node
;
1117 for (v_idx
= 0; v_idx
< adapter
->num_q_vectors
; v_idx
++) {
1118 if ((adapter
->num_q_vectors
== (adapter
->num_rx_queues
+
1119 adapter
->num_tx_queues
)) &&
1120 (adapter
->num_rx_queues
== v_idx
))
1121 adapter
->node
= orig_node
;
1122 if (orig_node
== -1) {
1123 int cur_node
= next_online_node(adapter
->node
);
1124 if (cur_node
== MAX_NUMNODES
)
1125 cur_node
= first_online_node
;
1126 adapter
->node
= cur_node
;
1128 q_vector
= kzalloc_node(sizeof(struct igb_q_vector
), GFP_KERNEL
,
1131 q_vector
= kzalloc(sizeof(struct igb_q_vector
),
1135 q_vector
->adapter
= adapter
;
1136 q_vector
->itr_register
= hw
->hw_addr
+ E1000_EITR(0);
1137 q_vector
->itr_val
= IGB_START_ITR
;
1138 netif_napi_add(adapter
->netdev
, &q_vector
->napi
, igb_poll
, 64);
1139 adapter
->q_vector
[v_idx
] = q_vector
;
1141 /* Restore the adapter's original node */
1142 adapter
->node
= orig_node
;
1147 /* Restore the adapter's original node */
1148 adapter
->node
= orig_node
;
1149 igb_free_q_vectors(adapter
);
1153 static void igb_map_rx_ring_to_vector(struct igb_adapter
*adapter
,
1154 int ring_idx
, int v_idx
)
1156 struct igb_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1158 q_vector
->rx
.ring
= adapter
->rx_ring
[ring_idx
];
1159 q_vector
->rx
.ring
->q_vector
= q_vector
;
1160 q_vector
->rx
.count
++;
1161 q_vector
->itr_val
= adapter
->rx_itr_setting
;
1162 if (q_vector
->itr_val
&& q_vector
->itr_val
<= 3)
1163 q_vector
->itr_val
= IGB_START_ITR
;
1166 static void igb_map_tx_ring_to_vector(struct igb_adapter
*adapter
,
1167 int ring_idx
, int v_idx
)
1169 struct igb_q_vector
*q_vector
= adapter
->q_vector
[v_idx
];
1171 q_vector
->tx
.ring
= adapter
->tx_ring
[ring_idx
];
1172 q_vector
->tx
.ring
->q_vector
= q_vector
;
1173 q_vector
->tx
.count
++;
1174 q_vector
->itr_val
= adapter
->tx_itr_setting
;
1175 q_vector
->tx
.work_limit
= adapter
->tx_work_limit
;
1176 if (q_vector
->itr_val
&& q_vector
->itr_val
<= 3)
1177 q_vector
->itr_val
= IGB_START_ITR
;
1181 * igb_map_ring_to_vector - maps allocated queues to vectors
1183 * This function maps the recently allocated queues to vectors.
1185 static int igb_map_ring_to_vector(struct igb_adapter
*adapter
)
1190 if ((adapter
->num_q_vectors
< adapter
->num_rx_queues
) ||
1191 (adapter
->num_q_vectors
< adapter
->num_tx_queues
))
1194 if (adapter
->num_q_vectors
>=
1195 (adapter
->num_rx_queues
+ adapter
->num_tx_queues
)) {
1196 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
1197 igb_map_rx_ring_to_vector(adapter
, i
, v_idx
++);
1198 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
1199 igb_map_tx_ring_to_vector(adapter
, i
, v_idx
++);
1201 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
1202 if (i
< adapter
->num_tx_queues
)
1203 igb_map_tx_ring_to_vector(adapter
, i
, v_idx
);
1204 igb_map_rx_ring_to_vector(adapter
, i
, v_idx
++);
1206 for (; i
< adapter
->num_tx_queues
; i
++)
1207 igb_map_tx_ring_to_vector(adapter
, i
, v_idx
++);
1213 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1215 * This function initializes the interrupts and allocates all of the queues.
1217 static int igb_init_interrupt_scheme(struct igb_adapter
*adapter
)
1219 struct pci_dev
*pdev
= adapter
->pdev
;
1222 err
= igb_set_interrupt_capability(adapter
);
1226 err
= igb_alloc_q_vectors(adapter
);
1228 dev_err(&pdev
->dev
, "Unable to allocate memory for vectors\n");
1229 goto err_alloc_q_vectors
;
1232 err
= igb_alloc_queues(adapter
);
1234 dev_err(&pdev
->dev
, "Unable to allocate memory for queues\n");
1235 goto err_alloc_queues
;
1238 err
= igb_map_ring_to_vector(adapter
);
1240 dev_err(&pdev
->dev
, "Invalid q_vector to ring mapping\n");
1241 goto err_map_queues
;
1247 igb_free_queues(adapter
);
1249 igb_free_q_vectors(adapter
);
1250 err_alloc_q_vectors
:
1251 igb_reset_interrupt_capability(adapter
);
1256 * igb_request_irq - initialize interrupts
1258 * Attempts to configure interrupts using the best available
1259 * capabilities of the hardware and kernel.
1261 static int igb_request_irq(struct igb_adapter
*adapter
)
1263 struct net_device
*netdev
= adapter
->netdev
;
1264 struct pci_dev
*pdev
= adapter
->pdev
;
1267 if (adapter
->msix_entries
) {
1268 err
= igb_request_msix(adapter
);
1271 /* fall back to MSI */
1272 igb_clear_interrupt_scheme(adapter
);
1273 if (!pci_enable_msi(pdev
))
1274 adapter
->flags
|= IGB_FLAG_HAS_MSI
;
1275 igb_free_all_tx_resources(adapter
);
1276 igb_free_all_rx_resources(adapter
);
1277 adapter
->num_tx_queues
= 1;
1278 adapter
->num_rx_queues
= 1;
1279 adapter
->num_q_vectors
= 1;
1280 err
= igb_alloc_q_vectors(adapter
);
1283 "Unable to allocate memory for vectors\n");
1286 err
= igb_alloc_queues(adapter
);
1289 "Unable to allocate memory for queues\n");
1290 igb_free_q_vectors(adapter
);
1293 igb_setup_all_tx_resources(adapter
);
1294 igb_setup_all_rx_resources(adapter
);
1297 igb_assign_vector(adapter
->q_vector
[0], 0);
1299 if (adapter
->flags
& IGB_FLAG_HAS_MSI
) {
1300 err
= request_irq(pdev
->irq
, igb_intr_msi
, 0,
1301 netdev
->name
, adapter
);
1305 /* fall back to legacy interrupts */
1306 igb_reset_interrupt_capability(adapter
);
1307 adapter
->flags
&= ~IGB_FLAG_HAS_MSI
;
1310 err
= request_irq(pdev
->irq
, igb_intr
, IRQF_SHARED
,
1311 netdev
->name
, adapter
);
1314 dev_err(&pdev
->dev
, "Error %d getting interrupt\n",
1321 static void igb_free_irq(struct igb_adapter
*adapter
)
1323 if (adapter
->msix_entries
) {
1326 free_irq(adapter
->msix_entries
[vector
++].vector
, adapter
);
1328 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1329 free_irq(adapter
->msix_entries
[vector
++].vector
,
1330 adapter
->q_vector
[i
]);
1332 free_irq(adapter
->pdev
->irq
, adapter
);
1337 * igb_irq_disable - Mask off interrupt generation on the NIC
1338 * @adapter: board private structure
1340 static void igb_irq_disable(struct igb_adapter
*adapter
)
1342 struct e1000_hw
*hw
= &adapter
->hw
;
1345 * we need to be careful when disabling interrupts. The VFs are also
1346 * mapped into these registers and so clearing the bits can cause
1347 * issues on the VF drivers so we only need to clear what we set
1349 if (adapter
->msix_entries
) {
1350 u32 regval
= rd32(E1000_EIAM
);
1351 wr32(E1000_EIAM
, regval
& ~adapter
->eims_enable_mask
);
1352 wr32(E1000_EIMC
, adapter
->eims_enable_mask
);
1353 regval
= rd32(E1000_EIAC
);
1354 wr32(E1000_EIAC
, regval
& ~adapter
->eims_enable_mask
);
1358 wr32(E1000_IMC
, ~0);
1360 if (adapter
->msix_entries
) {
1362 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1363 synchronize_irq(adapter
->msix_entries
[i
].vector
);
1365 synchronize_irq(adapter
->pdev
->irq
);
1370 * igb_irq_enable - Enable default interrupt generation settings
1371 * @adapter: board private structure
1373 static void igb_irq_enable(struct igb_adapter
*adapter
)
1375 struct e1000_hw
*hw
= &adapter
->hw
;
1377 if (adapter
->msix_entries
) {
1378 u32 ims
= E1000_IMS_LSC
| E1000_IMS_DOUTSYNC
| E1000_IMS_DRSTA
;
1379 u32 regval
= rd32(E1000_EIAC
);
1380 wr32(E1000_EIAC
, regval
| adapter
->eims_enable_mask
);
1381 regval
= rd32(E1000_EIAM
);
1382 wr32(E1000_EIAM
, regval
| adapter
->eims_enable_mask
);
1383 wr32(E1000_EIMS
, adapter
->eims_enable_mask
);
1384 if (adapter
->vfs_allocated_count
) {
1385 wr32(E1000_MBVFIMR
, 0xFF);
1386 ims
|= E1000_IMS_VMMB
;
1388 wr32(E1000_IMS
, ims
);
1390 wr32(E1000_IMS
, IMS_ENABLE_MASK
|
1392 wr32(E1000_IAM
, IMS_ENABLE_MASK
|
1397 static void igb_update_mng_vlan(struct igb_adapter
*adapter
)
1399 struct e1000_hw
*hw
= &adapter
->hw
;
1400 u16 vid
= adapter
->hw
.mng_cookie
.vlan_id
;
1401 u16 old_vid
= adapter
->mng_vlan_id
;
1403 if (hw
->mng_cookie
.status
& E1000_MNG_DHCP_COOKIE_STATUS_VLAN
) {
1404 /* add VID to filter table */
1405 igb_vfta_set(hw
, vid
, true);
1406 adapter
->mng_vlan_id
= vid
;
1408 adapter
->mng_vlan_id
= IGB_MNG_VLAN_NONE
;
1411 if ((old_vid
!= (u16
)IGB_MNG_VLAN_NONE
) &&
1413 !test_bit(old_vid
, adapter
->active_vlans
)) {
1414 /* remove VID from filter table */
1415 igb_vfta_set(hw
, old_vid
, false);
1420 * igb_release_hw_control - release control of the h/w to f/w
1421 * @adapter: address of board private structure
1423 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1424 * For ASF and Pass Through versions of f/w this means that the
1425 * driver is no longer loaded.
1428 static void igb_release_hw_control(struct igb_adapter
*adapter
)
1430 struct e1000_hw
*hw
= &adapter
->hw
;
1433 /* Let firmware take over control of h/w */
1434 ctrl_ext
= rd32(E1000_CTRL_EXT
);
1435 wr32(E1000_CTRL_EXT
,
1436 ctrl_ext
& ~E1000_CTRL_EXT_DRV_LOAD
);
1440 * igb_get_hw_control - get control of the h/w from f/w
1441 * @adapter: address of board private structure
1443 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1444 * For ASF and Pass Through versions of f/w this means that
1445 * the driver is loaded.
1448 static void igb_get_hw_control(struct igb_adapter
*adapter
)
1450 struct e1000_hw
*hw
= &adapter
->hw
;
1453 /* Let firmware know the driver has taken over */
1454 ctrl_ext
= rd32(E1000_CTRL_EXT
);
1455 wr32(E1000_CTRL_EXT
,
1456 ctrl_ext
| E1000_CTRL_EXT_DRV_LOAD
);
1460 * igb_configure - configure the hardware for RX and TX
1461 * @adapter: private board structure
1463 static void igb_configure(struct igb_adapter
*adapter
)
1465 struct net_device
*netdev
= adapter
->netdev
;
1468 igb_get_hw_control(adapter
);
1469 igb_set_rx_mode(netdev
);
1471 igb_restore_vlan(adapter
);
1473 igb_setup_tctl(adapter
);
1474 igb_setup_mrqc(adapter
);
1475 igb_setup_rctl(adapter
);
1477 igb_configure_tx(adapter
);
1478 igb_configure_rx(adapter
);
1480 igb_rx_fifo_flush_82575(&adapter
->hw
);
1482 /* call igb_desc_unused which always leaves
1483 * at least 1 descriptor unused to make sure
1484 * next_to_use != next_to_clean */
1485 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
1486 struct igb_ring
*ring
= adapter
->rx_ring
[i
];
1487 igb_alloc_rx_buffers(ring
, igb_desc_unused(ring
));
1492 * igb_power_up_link - Power up the phy/serdes link
1493 * @adapter: address of board private structure
1495 void igb_power_up_link(struct igb_adapter
*adapter
)
1497 if (adapter
->hw
.phy
.media_type
== e1000_media_type_copper
)
1498 igb_power_up_phy_copper(&adapter
->hw
);
1500 igb_power_up_serdes_link_82575(&adapter
->hw
);
1504 * igb_power_down_link - Power down the phy/serdes link
1505 * @adapter: address of board private structure
1507 static void igb_power_down_link(struct igb_adapter
*adapter
)
1509 if (adapter
->hw
.phy
.media_type
== e1000_media_type_copper
)
1510 igb_power_down_phy_copper_82575(&adapter
->hw
);
1512 igb_shutdown_serdes_link_82575(&adapter
->hw
);
1516 * igb_up - Open the interface and prepare it to handle traffic
1517 * @adapter: board private structure
1519 int igb_up(struct igb_adapter
*adapter
)
1521 struct e1000_hw
*hw
= &adapter
->hw
;
1524 /* hardware has been reset, we need to reload some things */
1525 igb_configure(adapter
);
1527 clear_bit(__IGB_DOWN
, &adapter
->state
);
1529 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1530 napi_enable(&(adapter
->q_vector
[i
]->napi
));
1532 if (adapter
->msix_entries
)
1533 igb_configure_msix(adapter
);
1535 igb_assign_vector(adapter
->q_vector
[0], 0);
1537 /* Clear any pending interrupts. */
1539 igb_irq_enable(adapter
);
1541 /* notify VFs that reset has been completed */
1542 if (adapter
->vfs_allocated_count
) {
1543 u32 reg_data
= rd32(E1000_CTRL_EXT
);
1544 reg_data
|= E1000_CTRL_EXT_PFRSTD
;
1545 wr32(E1000_CTRL_EXT
, reg_data
);
1548 netif_tx_start_all_queues(adapter
->netdev
);
1550 /* start the watchdog. */
1551 hw
->mac
.get_link_status
= 1;
1552 schedule_work(&adapter
->watchdog_task
);
1557 void igb_down(struct igb_adapter
*adapter
)
1559 struct net_device
*netdev
= adapter
->netdev
;
1560 struct e1000_hw
*hw
= &adapter
->hw
;
1564 /* signal that we're down so the interrupt handler does not
1565 * reschedule our watchdog timer */
1566 set_bit(__IGB_DOWN
, &adapter
->state
);
1568 /* disable receives in the hardware */
1569 rctl
= rd32(E1000_RCTL
);
1570 wr32(E1000_RCTL
, rctl
& ~E1000_RCTL_EN
);
1571 /* flush and sleep below */
1573 netif_tx_stop_all_queues(netdev
);
1575 /* disable transmits in the hardware */
1576 tctl
= rd32(E1000_TCTL
);
1577 tctl
&= ~E1000_TCTL_EN
;
1578 wr32(E1000_TCTL
, tctl
);
1579 /* flush both disables and wait for them to finish */
1583 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
1584 napi_disable(&(adapter
->q_vector
[i
]->napi
));
1586 igb_irq_disable(adapter
);
1588 del_timer_sync(&adapter
->watchdog_timer
);
1589 del_timer_sync(&adapter
->phy_info_timer
);
1591 netif_carrier_off(netdev
);
1593 /* record the stats before reset*/
1594 spin_lock(&adapter
->stats64_lock
);
1595 igb_update_stats(adapter
, &adapter
->stats64
);
1596 spin_unlock(&adapter
->stats64_lock
);
1598 adapter
->link_speed
= 0;
1599 adapter
->link_duplex
= 0;
1601 if (!pci_channel_offline(adapter
->pdev
))
1603 igb_clean_all_tx_rings(adapter
);
1604 igb_clean_all_rx_rings(adapter
);
1605 #ifdef CONFIG_IGB_DCA
1607 /* since we reset the hardware DCA settings were cleared */
1608 igb_setup_dca(adapter
);
1612 void igb_reinit_locked(struct igb_adapter
*adapter
)
1614 WARN_ON(in_interrupt());
1615 while (test_and_set_bit(__IGB_RESETTING
, &adapter
->state
))
1619 clear_bit(__IGB_RESETTING
, &adapter
->state
);
1622 void igb_reset(struct igb_adapter
*adapter
)
1624 struct pci_dev
*pdev
= adapter
->pdev
;
1625 struct e1000_hw
*hw
= &adapter
->hw
;
1626 struct e1000_mac_info
*mac
= &hw
->mac
;
1627 struct e1000_fc_info
*fc
= &hw
->fc
;
1628 u32 pba
= 0, tx_space
, min_tx_space
, min_rx_space
;
1631 /* Repartition Pba for greater than 9k mtu
1632 * To take effect CTRL.RST is required.
1634 switch (mac
->type
) {
1637 pba
= rd32(E1000_RXPBS
);
1638 pba
= igb_rxpbs_adjust_82580(pba
);
1641 pba
= rd32(E1000_RXPBS
);
1642 pba
&= E1000_RXPBS_SIZE_MASK_82576
;
1646 pba
= E1000_PBA_34K
;
1650 if ((adapter
->max_frame_size
> ETH_FRAME_LEN
+ ETH_FCS_LEN
) &&
1651 (mac
->type
< e1000_82576
)) {
1652 /* adjust PBA for jumbo frames */
1653 wr32(E1000_PBA
, pba
);
1655 /* To maintain wire speed transmits, the Tx FIFO should be
1656 * large enough to accommodate two full transmit packets,
1657 * rounded up to the next 1KB and expressed in KB. Likewise,
1658 * the Rx FIFO should be large enough to accommodate at least
1659 * one full receive packet and is similarly rounded up and
1660 * expressed in KB. */
1661 pba
= rd32(E1000_PBA
);
1662 /* upper 16 bits has Tx packet buffer allocation size in KB */
1663 tx_space
= pba
>> 16;
1664 /* lower 16 bits has Rx packet buffer allocation size in KB */
1666 /* the tx fifo also stores 16 bytes of information about the tx
1667 * but don't include ethernet FCS because hardware appends it */
1668 min_tx_space
= (adapter
->max_frame_size
+
1669 sizeof(union e1000_adv_tx_desc
) -
1671 min_tx_space
= ALIGN(min_tx_space
, 1024);
1672 min_tx_space
>>= 10;
1673 /* software strips receive CRC, so leave room for it */
1674 min_rx_space
= adapter
->max_frame_size
;
1675 min_rx_space
= ALIGN(min_rx_space
, 1024);
1676 min_rx_space
>>= 10;
1678 /* If current Tx allocation is less than the min Tx FIFO size,
1679 * and the min Tx FIFO size is less than the current Rx FIFO
1680 * allocation, take space away from current Rx allocation */
1681 if (tx_space
< min_tx_space
&&
1682 ((min_tx_space
- tx_space
) < pba
)) {
1683 pba
= pba
- (min_tx_space
- tx_space
);
1685 /* if short on rx space, rx wins and must trump tx
1687 if (pba
< min_rx_space
)
1690 wr32(E1000_PBA
, pba
);
1693 /* flow control settings */
1694 /* The high water mark must be low enough to fit one full frame
1695 * (or the size used for early receive) above it in the Rx FIFO.
1696 * Set it to the lower of:
1697 * - 90% of the Rx FIFO size, or
1698 * - the full Rx FIFO size minus one full frame */
1699 hwm
= min(((pba
<< 10) * 9 / 10),
1700 ((pba
<< 10) - 2 * adapter
->max_frame_size
));
1702 fc
->high_water
= hwm
& 0xFFF0; /* 16-byte granularity */
1703 fc
->low_water
= fc
->high_water
- 16;
1704 fc
->pause_time
= 0xFFFF;
1706 fc
->current_mode
= fc
->requested_mode
;
1708 /* disable receive for all VFs and wait one second */
1709 if (adapter
->vfs_allocated_count
) {
1711 for (i
= 0 ; i
< adapter
->vfs_allocated_count
; i
++)
1712 adapter
->vf_data
[i
].flags
&= IGB_VF_FLAG_PF_SET_MAC
;
1714 /* ping all the active vfs to let them know we are going down */
1715 igb_ping_all_vfs(adapter
);
1717 /* disable transmits and receives */
1718 wr32(E1000_VFRE
, 0);
1719 wr32(E1000_VFTE
, 0);
1722 /* Allow time for pending master requests to run */
1723 hw
->mac
.ops
.reset_hw(hw
);
1726 if (hw
->mac
.ops
.init_hw(hw
))
1727 dev_err(&pdev
->dev
, "Hardware Error\n");
1729 igb_init_dmac(adapter
, pba
);
1730 if (!netif_running(adapter
->netdev
))
1731 igb_power_down_link(adapter
);
1733 igb_update_mng_vlan(adapter
);
1735 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1736 wr32(E1000_VET
, ETHERNET_IEEE_VLAN_TYPE
);
1738 igb_get_phy_info(hw
);
1741 static netdev_features_t
igb_fix_features(struct net_device
*netdev
,
1742 netdev_features_t features
)
1745 * Since there is no support for separate rx/tx vlan accel
1746 * enable/disable make sure tx flag is always in same state as rx.
1748 if (features
& NETIF_F_HW_VLAN_RX
)
1749 features
|= NETIF_F_HW_VLAN_TX
;
1751 features
&= ~NETIF_F_HW_VLAN_TX
;
1756 static int igb_set_features(struct net_device
*netdev
,
1757 netdev_features_t features
)
1759 netdev_features_t changed
= netdev
->features
^ features
;
1761 if (changed
& NETIF_F_HW_VLAN_RX
)
1762 igb_vlan_mode(netdev
, features
);
1767 static const struct net_device_ops igb_netdev_ops
= {
1768 .ndo_open
= igb_open
,
1769 .ndo_stop
= igb_close
,
1770 .ndo_start_xmit
= igb_xmit_frame
,
1771 .ndo_get_stats64
= igb_get_stats64
,
1772 .ndo_set_rx_mode
= igb_set_rx_mode
,
1773 .ndo_set_mac_address
= igb_set_mac
,
1774 .ndo_change_mtu
= igb_change_mtu
,
1775 .ndo_do_ioctl
= igb_ioctl
,
1776 .ndo_tx_timeout
= igb_tx_timeout
,
1777 .ndo_validate_addr
= eth_validate_addr
,
1778 .ndo_vlan_rx_add_vid
= igb_vlan_rx_add_vid
,
1779 .ndo_vlan_rx_kill_vid
= igb_vlan_rx_kill_vid
,
1780 .ndo_set_vf_mac
= igb_ndo_set_vf_mac
,
1781 .ndo_set_vf_vlan
= igb_ndo_set_vf_vlan
,
1782 .ndo_set_vf_tx_rate
= igb_ndo_set_vf_bw
,
1783 .ndo_get_vf_config
= igb_ndo_get_vf_config
,
1784 #ifdef CONFIG_NET_POLL_CONTROLLER
1785 .ndo_poll_controller
= igb_netpoll
,
1787 .ndo_fix_features
= igb_fix_features
,
1788 .ndo_set_features
= igb_set_features
,
1792 * igb_probe - Device Initialization Routine
1793 * @pdev: PCI device information struct
1794 * @ent: entry in igb_pci_tbl
1796 * Returns 0 on success, negative on failure
1798 * igb_probe initializes an adapter identified by a pci_dev structure.
1799 * The OS initialization, configuring of the adapter private structure,
1800 * and a hardware reset occur.
1802 static int __devinit
igb_probe(struct pci_dev
*pdev
,
1803 const struct pci_device_id
*ent
)
1805 struct net_device
*netdev
;
1806 struct igb_adapter
*adapter
;
1807 struct e1000_hw
*hw
;
1808 u16 eeprom_data
= 0;
1810 static int global_quad_port_a
; /* global quad port a indication */
1811 const struct e1000_info
*ei
= igb_info_tbl
[ent
->driver_data
];
1812 unsigned long mmio_start
, mmio_len
;
1813 int err
, pci_using_dac
;
1814 u16 eeprom_apme_mask
= IGB_EEPROM_APME
;
1815 u8 part_str
[E1000_PBANUM_LENGTH
];
1817 /* Catch broken hardware that put the wrong VF device ID in
1818 * the PCIe SR-IOV capability.
1820 if (pdev
->is_virtfn
) {
1821 WARN(1, KERN_ERR
"%s (%hx:%hx) should not be a VF!\n",
1822 pci_name(pdev
), pdev
->vendor
, pdev
->device
);
1826 err
= pci_enable_device_mem(pdev
);
1831 err
= dma_set_mask(&pdev
->dev
, DMA_BIT_MASK(64));
1833 err
= dma_set_coherent_mask(&pdev
->dev
, DMA_BIT_MASK(64));
1837 err
= dma_set_mask(&pdev
->dev
, DMA_BIT_MASK(32));
1839 err
= dma_set_coherent_mask(&pdev
->dev
, DMA_BIT_MASK(32));
1841 dev_err(&pdev
->dev
, "No usable DMA "
1842 "configuration, aborting\n");
1848 err
= pci_request_selected_regions(pdev
, pci_select_bars(pdev
,
1854 pci_enable_pcie_error_reporting(pdev
);
1856 pci_set_master(pdev
);
1857 pci_save_state(pdev
);
1860 netdev
= alloc_etherdev_mq(sizeof(struct igb_adapter
),
1863 goto err_alloc_etherdev
;
1865 SET_NETDEV_DEV(netdev
, &pdev
->dev
);
1867 pci_set_drvdata(pdev
, netdev
);
1868 adapter
= netdev_priv(netdev
);
1869 adapter
->netdev
= netdev
;
1870 adapter
->pdev
= pdev
;
1873 adapter
->msg_enable
= NETIF_MSG_DRV
| NETIF_MSG_PROBE
;
1875 mmio_start
= pci_resource_start(pdev
, 0);
1876 mmio_len
= pci_resource_len(pdev
, 0);
1879 hw
->hw_addr
= ioremap(mmio_start
, mmio_len
);
1883 netdev
->netdev_ops
= &igb_netdev_ops
;
1884 igb_set_ethtool_ops(netdev
);
1885 netdev
->watchdog_timeo
= 5 * HZ
;
1887 strncpy(netdev
->name
, pci_name(pdev
), sizeof(netdev
->name
) - 1);
1889 netdev
->mem_start
= mmio_start
;
1890 netdev
->mem_end
= mmio_start
+ mmio_len
;
1892 /* PCI config space info */
1893 hw
->vendor_id
= pdev
->vendor
;
1894 hw
->device_id
= pdev
->device
;
1895 hw
->revision_id
= pdev
->revision
;
1896 hw
->subsystem_vendor_id
= pdev
->subsystem_vendor
;
1897 hw
->subsystem_device_id
= pdev
->subsystem_device
;
1899 /* Copy the default MAC, PHY and NVM function pointers */
1900 memcpy(&hw
->mac
.ops
, ei
->mac_ops
, sizeof(hw
->mac
.ops
));
1901 memcpy(&hw
->phy
.ops
, ei
->phy_ops
, sizeof(hw
->phy
.ops
));
1902 memcpy(&hw
->nvm
.ops
, ei
->nvm_ops
, sizeof(hw
->nvm
.ops
));
1903 /* Initialize skew-specific constants */
1904 err
= ei
->get_invariants(hw
);
1908 /* setup the private structure */
1909 err
= igb_sw_init(adapter
);
1913 igb_get_bus_info_pcie(hw
);
1915 hw
->phy
.autoneg_wait_to_complete
= false;
1917 /* Copper options */
1918 if (hw
->phy
.media_type
== e1000_media_type_copper
) {
1919 hw
->phy
.mdix
= AUTO_ALL_MODES
;
1920 hw
->phy
.disable_polarity_correction
= false;
1921 hw
->phy
.ms_type
= e1000_ms_hw_default
;
1924 if (igb_check_reset_block(hw
))
1925 dev_info(&pdev
->dev
,
1926 "PHY reset is blocked due to SOL/IDER session.\n");
1929 * features is initialized to 0 in allocation, it might have bits
1930 * set by igb_sw_init so we should use an or instead of an
1933 netdev
->features
|= NETIF_F_SG
|
1940 NETIF_F_HW_VLAN_RX
|
1943 /* copy netdev features into list of user selectable features */
1944 netdev
->hw_features
|= netdev
->features
;
1946 /* set this bit last since it cannot be part of hw_features */
1947 netdev
->features
|= NETIF_F_HW_VLAN_FILTER
;
1949 netdev
->vlan_features
|= NETIF_F_TSO
|
1955 if (pci_using_dac
) {
1956 netdev
->features
|= NETIF_F_HIGHDMA
;
1957 netdev
->vlan_features
|= NETIF_F_HIGHDMA
;
1960 if (hw
->mac
.type
>= e1000_82576
) {
1961 netdev
->hw_features
|= NETIF_F_SCTP_CSUM
;
1962 netdev
->features
|= NETIF_F_SCTP_CSUM
;
1965 netdev
->priv_flags
|= IFF_UNICAST_FLT
;
1967 adapter
->en_mng_pt
= igb_enable_mng_pass_thru(hw
);
1969 /* before reading the NVM, reset the controller to put the device in a
1970 * known good starting state */
1971 hw
->mac
.ops
.reset_hw(hw
);
1973 /* make sure the NVM is good */
1974 if (hw
->nvm
.ops
.validate(hw
) < 0) {
1975 dev_err(&pdev
->dev
, "The NVM Checksum Is Not Valid\n");
1980 /* copy the MAC address out of the NVM */
1981 if (hw
->mac
.ops
.read_mac_addr(hw
))
1982 dev_err(&pdev
->dev
, "NVM Read Error\n");
1984 memcpy(netdev
->dev_addr
, hw
->mac
.addr
, netdev
->addr_len
);
1985 memcpy(netdev
->perm_addr
, hw
->mac
.addr
, netdev
->addr_len
);
1987 if (!is_valid_ether_addr(netdev
->perm_addr
)) {
1988 dev_err(&pdev
->dev
, "Invalid MAC Address\n");
1993 setup_timer(&adapter
->watchdog_timer
, igb_watchdog
,
1994 (unsigned long) adapter
);
1995 setup_timer(&adapter
->phy_info_timer
, igb_update_phy_info
,
1996 (unsigned long) adapter
);
1998 INIT_WORK(&adapter
->reset_task
, igb_reset_task
);
1999 INIT_WORK(&adapter
->watchdog_task
, igb_watchdog_task
);
2001 /* Initialize link properties that are user-changeable */
2002 adapter
->fc_autoneg
= true;
2003 hw
->mac
.autoneg
= true;
2004 hw
->phy
.autoneg_advertised
= 0x2f;
2006 hw
->fc
.requested_mode
= e1000_fc_default
;
2007 hw
->fc
.current_mode
= e1000_fc_default
;
2009 igb_validate_mdi_setting(hw
);
2011 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2012 * enable the ACPI Magic Packet filter
2015 if (hw
->bus
.func
== 0)
2016 hw
->nvm
.ops
.read(hw
, NVM_INIT_CONTROL3_PORT_A
, 1, &eeprom_data
);
2017 else if (hw
->mac
.type
>= e1000_82580
)
2018 hw
->nvm
.ops
.read(hw
, NVM_INIT_CONTROL3_PORT_A
+
2019 NVM_82580_LAN_FUNC_OFFSET(hw
->bus
.func
), 1,
2021 else if (hw
->bus
.func
== 1)
2022 hw
->nvm
.ops
.read(hw
, NVM_INIT_CONTROL3_PORT_B
, 1, &eeprom_data
);
2024 if (eeprom_data
& eeprom_apme_mask
)
2025 adapter
->eeprom_wol
|= E1000_WUFC_MAG
;
2027 /* now that we have the eeprom settings, apply the special cases where
2028 * the eeprom may be wrong or the board simply won't support wake on
2029 * lan on a particular port */
2030 switch (pdev
->device
) {
2031 case E1000_DEV_ID_82575GB_QUAD_COPPER
:
2032 adapter
->eeprom_wol
= 0;
2034 case E1000_DEV_ID_82575EB_FIBER_SERDES
:
2035 case E1000_DEV_ID_82576_FIBER
:
2036 case E1000_DEV_ID_82576_SERDES
:
2037 /* Wake events only supported on port A for dual fiber
2038 * regardless of eeprom setting */
2039 if (rd32(E1000_STATUS
) & E1000_STATUS_FUNC_1
)
2040 adapter
->eeprom_wol
= 0;
2042 case E1000_DEV_ID_82576_QUAD_COPPER
:
2043 case E1000_DEV_ID_82576_QUAD_COPPER_ET2
:
2044 /* if quad port adapter, disable WoL on all but port A */
2045 if (global_quad_port_a
!= 0)
2046 adapter
->eeprom_wol
= 0;
2048 adapter
->flags
|= IGB_FLAG_QUAD_PORT_A
;
2049 /* Reset for multiple quad port adapters */
2050 if (++global_quad_port_a
== 4)
2051 global_quad_port_a
= 0;
2055 /* initialize the wol settings based on the eeprom settings */
2056 adapter
->wol
= adapter
->eeprom_wol
;
2057 device_set_wakeup_enable(&adapter
->pdev
->dev
, adapter
->wol
);
2059 /* reset the hardware with the new settings */
2062 /* let the f/w know that the h/w is now under the control of the
2064 igb_get_hw_control(adapter
);
2066 strcpy(netdev
->name
, "eth%d");
2067 err
= register_netdev(netdev
);
2071 /* carrier off reporting is important to ethtool even BEFORE open */
2072 netif_carrier_off(netdev
);
2074 #ifdef CONFIG_IGB_DCA
2075 if (dca_add_requester(&pdev
->dev
) == 0) {
2076 adapter
->flags
|= IGB_FLAG_DCA_ENABLED
;
2077 dev_info(&pdev
->dev
, "DCA enabled\n");
2078 igb_setup_dca(adapter
);
2082 /* do hw tstamp init after resetting */
2083 igb_init_hw_timer(adapter
);
2085 dev_info(&pdev
->dev
, "Intel(R) Gigabit Ethernet Network Connection\n");
2086 /* print bus type/speed/width info */
2087 dev_info(&pdev
->dev
, "%s: (PCIe:%s:%s) %pM\n",
2089 ((hw
->bus
.speed
== e1000_bus_speed_2500
) ? "2.5Gb/s" :
2090 (hw
->bus
.speed
== e1000_bus_speed_5000
) ? "5.0Gb/s" :
2092 ((hw
->bus
.width
== e1000_bus_width_pcie_x4
) ? "Width x4" :
2093 (hw
->bus
.width
== e1000_bus_width_pcie_x2
) ? "Width x2" :
2094 (hw
->bus
.width
== e1000_bus_width_pcie_x1
) ? "Width x1" :
2098 ret_val
= igb_read_part_string(hw
, part_str
, E1000_PBANUM_LENGTH
);
2100 strcpy(part_str
, "Unknown");
2101 dev_info(&pdev
->dev
, "%s: PBA No: %s\n", netdev
->name
, part_str
);
2102 dev_info(&pdev
->dev
,
2103 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2104 adapter
->msix_entries
? "MSI-X" :
2105 (adapter
->flags
& IGB_FLAG_HAS_MSI
) ? "MSI" : "legacy",
2106 adapter
->num_rx_queues
, adapter
->num_tx_queues
);
2107 switch (hw
->mac
.type
) {
2109 igb_set_eee_i350(hw
);
2117 igb_release_hw_control(adapter
);
2119 if (!igb_check_reset_block(hw
))
2122 if (hw
->flash_address
)
2123 iounmap(hw
->flash_address
);
2125 igb_clear_interrupt_scheme(adapter
);
2126 iounmap(hw
->hw_addr
);
2128 free_netdev(netdev
);
2130 pci_release_selected_regions(pdev
,
2131 pci_select_bars(pdev
, IORESOURCE_MEM
));
2134 pci_disable_device(pdev
);
2139 * igb_remove - Device Removal Routine
2140 * @pdev: PCI device information struct
2142 * igb_remove is called by the PCI subsystem to alert the driver
2143 * that it should release a PCI device. The could be caused by a
2144 * Hot-Plug event, or because the driver is going to be removed from
2147 static void __devexit
igb_remove(struct pci_dev
*pdev
)
2149 struct net_device
*netdev
= pci_get_drvdata(pdev
);
2150 struct igb_adapter
*adapter
= netdev_priv(netdev
);
2151 struct e1000_hw
*hw
= &adapter
->hw
;
2154 * The watchdog timer may be rescheduled, so explicitly
2155 * disable watchdog from being rescheduled.
2157 set_bit(__IGB_DOWN
, &adapter
->state
);
2158 del_timer_sync(&adapter
->watchdog_timer
);
2159 del_timer_sync(&adapter
->phy_info_timer
);
2161 cancel_work_sync(&adapter
->reset_task
);
2162 cancel_work_sync(&adapter
->watchdog_task
);
2164 #ifdef CONFIG_IGB_DCA
2165 if (adapter
->flags
& IGB_FLAG_DCA_ENABLED
) {
2166 dev_info(&pdev
->dev
, "DCA disabled\n");
2167 dca_remove_requester(&pdev
->dev
);
2168 adapter
->flags
&= ~IGB_FLAG_DCA_ENABLED
;
2169 wr32(E1000_DCA_CTRL
, E1000_DCA_CTRL_DCA_MODE_DISABLE
);
2173 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2174 * would have already happened in close and is redundant. */
2175 igb_release_hw_control(adapter
);
2177 unregister_netdev(netdev
);
2179 igb_clear_interrupt_scheme(adapter
);
2181 #ifdef CONFIG_PCI_IOV
2182 /* reclaim resources allocated to VFs */
2183 if (adapter
->vf_data
) {
2184 /* disable iov and allow time for transactions to clear */
2185 if (!igb_check_vf_assignment(adapter
)) {
2186 pci_disable_sriov(pdev
);
2189 dev_info(&pdev
->dev
, "VF(s) assigned to guests!\n");
2192 kfree(adapter
->vf_data
);
2193 adapter
->vf_data
= NULL
;
2194 wr32(E1000_IOVCTL
, E1000_IOVCTL_REUSE_VFQ
);
2197 dev_info(&pdev
->dev
, "IOV Disabled\n");
2201 iounmap(hw
->hw_addr
);
2202 if (hw
->flash_address
)
2203 iounmap(hw
->flash_address
);
2204 pci_release_selected_regions(pdev
,
2205 pci_select_bars(pdev
, IORESOURCE_MEM
));
2207 kfree(adapter
->shadow_vfta
);
2208 free_netdev(netdev
);
2210 pci_disable_pcie_error_reporting(pdev
);
2212 pci_disable_device(pdev
);
2216 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2217 * @adapter: board private structure to initialize
2219 * This function initializes the vf specific data storage and then attempts to
2220 * allocate the VFs. The reason for ordering it this way is because it is much
2221 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2222 * the memory for the VFs.
2224 static void __devinit
igb_probe_vfs(struct igb_adapter
* adapter
)
2226 #ifdef CONFIG_PCI_IOV
2227 struct pci_dev
*pdev
= adapter
->pdev
;
2228 int old_vfs
= igb_find_enabled_vfs(adapter
);
2232 dev_info(&pdev
->dev
, "%d pre-allocated VFs found - override "
2233 "max_vfs setting of %d\n", old_vfs
, max_vfs
);
2234 adapter
->vfs_allocated_count
= old_vfs
;
2237 if (!adapter
->vfs_allocated_count
)
2240 adapter
->vf_data
= kcalloc(adapter
->vfs_allocated_count
,
2241 sizeof(struct vf_data_storage
), GFP_KERNEL
);
2242 /* if allocation failed then we do not support SR-IOV */
2243 if (!adapter
->vf_data
) {
2244 adapter
->vfs_allocated_count
= 0;
2245 dev_err(&pdev
->dev
, "Unable to allocate memory for VF "
2251 if (pci_enable_sriov(pdev
, adapter
->vfs_allocated_count
))
2254 dev_info(&pdev
->dev
, "%d VFs allocated\n",
2255 adapter
->vfs_allocated_count
);
2256 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++)
2257 igb_vf_configure(adapter
, i
);
2259 /* DMA Coalescing is not supported in IOV mode. */
2260 adapter
->flags
&= ~IGB_FLAG_DMAC
;
2263 kfree(adapter
->vf_data
);
2264 adapter
->vf_data
= NULL
;
2265 adapter
->vfs_allocated_count
= 0;
2268 #endif /* CONFIG_PCI_IOV */
2272 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2273 * @adapter: board private structure to initialize
2275 * igb_init_hw_timer initializes the function pointer and values for the hw
2276 * timer found in hardware.
2278 static void igb_init_hw_timer(struct igb_adapter
*adapter
)
2280 struct e1000_hw
*hw
= &adapter
->hw
;
2282 switch (hw
->mac
.type
) {
2285 memset(&adapter
->cycles
, 0, sizeof(adapter
->cycles
));
2286 adapter
->cycles
.read
= igb_read_clock
;
2287 adapter
->cycles
.mask
= CLOCKSOURCE_MASK(64);
2288 adapter
->cycles
.mult
= 1;
2290 * The 82580 timesync updates the system timer every 8ns by 8ns
2291 * and the value cannot be shifted. Instead we need to shift
2292 * the registers to generate a 64bit timer value. As a result
2293 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2294 * 24 in order to generate a larger value for synchronization.
2296 adapter
->cycles
.shift
= IGB_82580_TSYNC_SHIFT
;
2297 /* disable system timer temporarily by setting bit 31 */
2298 wr32(E1000_TSAUXC
, 0x80000000);
2301 /* Set registers so that rollover occurs soon to test this. */
2302 wr32(E1000_SYSTIMR
, 0x00000000);
2303 wr32(E1000_SYSTIML
, 0x80000000);
2304 wr32(E1000_SYSTIMH
, 0x000000FF);
2307 /* enable system timer by clearing bit 31 */
2308 wr32(E1000_TSAUXC
, 0x0);
2311 timecounter_init(&adapter
->clock
,
2313 ktime_to_ns(ktime_get_real()));
2315 * Synchronize our NIC clock against system wall clock. NIC
2316 * time stamp reading requires ~3us per sample, each sample
2317 * was pretty stable even under load => only require 10
2318 * samples for each offset comparison.
2320 memset(&adapter
->compare
, 0, sizeof(adapter
->compare
));
2321 adapter
->compare
.source
= &adapter
->clock
;
2322 adapter
->compare
.target
= ktime_get_real
;
2323 adapter
->compare
.num_samples
= 10;
2324 timecompare_update(&adapter
->compare
, 0);
2328 * Initialize hardware timer: we keep it running just in case
2329 * that some program needs it later on.
2331 memset(&adapter
->cycles
, 0, sizeof(adapter
->cycles
));
2332 adapter
->cycles
.read
= igb_read_clock
;
2333 adapter
->cycles
.mask
= CLOCKSOURCE_MASK(64);
2334 adapter
->cycles
.mult
= 1;
2336 * Scale the NIC clock cycle by a large factor so that
2337 * relatively small clock corrections can be added or
2338 * subtracted at each clock tick. The drawbacks of a large
2339 * factor are a) that the clock register overflows more quickly
2340 * (not such a big deal) and b) that the increment per tick has
2341 * to fit into 24 bits. As a result we need to use a shift of
2342 * 19 so we can fit a value of 16 into the TIMINCA register.
2344 adapter
->cycles
.shift
= IGB_82576_TSYNC_SHIFT
;
2346 (1 << E1000_TIMINCA_16NS_SHIFT
) |
2347 (16 << IGB_82576_TSYNC_SHIFT
));
2349 /* Set registers so that rollover occurs soon to test this. */
2350 wr32(E1000_SYSTIML
, 0x00000000);
2351 wr32(E1000_SYSTIMH
, 0xFF800000);
2354 timecounter_init(&adapter
->clock
,
2356 ktime_to_ns(ktime_get_real()));
2358 * Synchronize our NIC clock against system wall clock. NIC
2359 * time stamp reading requires ~3us per sample, each sample
2360 * was pretty stable even under load => only require 10
2361 * samples for each offset comparison.
2363 memset(&adapter
->compare
, 0, sizeof(adapter
->compare
));
2364 adapter
->compare
.source
= &adapter
->clock
;
2365 adapter
->compare
.target
= ktime_get_real
;
2366 adapter
->compare
.num_samples
= 10;
2367 timecompare_update(&adapter
->compare
, 0);
2370 /* 82575 does not support timesync */
2378 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2379 * @adapter: board private structure to initialize
2381 * igb_sw_init initializes the Adapter private data structure.
2382 * Fields are initialized based on PCI device information and
2383 * OS network device settings (MTU size).
2385 static int __devinit
igb_sw_init(struct igb_adapter
*adapter
)
2387 struct e1000_hw
*hw
= &adapter
->hw
;
2388 struct net_device
*netdev
= adapter
->netdev
;
2389 struct pci_dev
*pdev
= adapter
->pdev
;
2391 pci_read_config_word(pdev
, PCI_COMMAND
, &hw
->bus
.pci_cmd_word
);
2393 /* set default ring sizes */
2394 adapter
->tx_ring_count
= IGB_DEFAULT_TXD
;
2395 adapter
->rx_ring_count
= IGB_DEFAULT_RXD
;
2397 /* set default ITR values */
2398 adapter
->rx_itr_setting
= IGB_DEFAULT_ITR
;
2399 adapter
->tx_itr_setting
= IGB_DEFAULT_ITR
;
2401 /* set default work limits */
2402 adapter
->tx_work_limit
= IGB_DEFAULT_TX_WORK
;
2404 adapter
->max_frame_size
= netdev
->mtu
+ ETH_HLEN
+ ETH_FCS_LEN
+
2406 adapter
->min_frame_size
= ETH_ZLEN
+ ETH_FCS_LEN
;
2410 spin_lock_init(&adapter
->stats64_lock
);
2411 #ifdef CONFIG_PCI_IOV
2412 switch (hw
->mac
.type
) {
2416 dev_warn(&pdev
->dev
,
2417 "Maximum of 7 VFs per PF, using max\n");
2418 adapter
->vfs_allocated_count
= 7;
2420 adapter
->vfs_allocated_count
= max_vfs
;
2425 #endif /* CONFIG_PCI_IOV */
2426 adapter
->rss_queues
= min_t(u32
, IGB_MAX_RX_QUEUES
, num_online_cpus());
2427 /* i350 cannot do RSS and SR-IOV at the same time */
2428 if (hw
->mac
.type
== e1000_i350
&& adapter
->vfs_allocated_count
)
2429 adapter
->rss_queues
= 1;
2432 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2433 * then we should combine the queues into a queue pair in order to
2434 * conserve interrupts due to limited supply
2436 if ((adapter
->rss_queues
> 4) ||
2437 ((adapter
->rss_queues
> 1) && (adapter
->vfs_allocated_count
> 6)))
2438 adapter
->flags
|= IGB_FLAG_QUEUE_PAIRS
;
2440 /* Setup and initialize a copy of the hw vlan table array */
2441 adapter
->shadow_vfta
= kzalloc(sizeof(u32
) *
2442 E1000_VLAN_FILTER_TBL_SIZE
,
2445 /* This call may decrease the number of queues */
2446 if (igb_init_interrupt_scheme(adapter
)) {
2447 dev_err(&pdev
->dev
, "Unable to allocate memory for queues\n");
2451 igb_probe_vfs(adapter
);
2453 /* Explicitly disable IRQ since the NIC can be in any state. */
2454 igb_irq_disable(adapter
);
2456 if (hw
->mac
.type
== e1000_i350
)
2457 adapter
->flags
&= ~IGB_FLAG_DMAC
;
2459 set_bit(__IGB_DOWN
, &adapter
->state
);
2464 * igb_open - Called when a network interface is made active
2465 * @netdev: network interface device structure
2467 * Returns 0 on success, negative value on failure
2469 * The open entry point is called when a network interface is made
2470 * active by the system (IFF_UP). At this point all resources needed
2471 * for transmit and receive operations are allocated, the interrupt
2472 * handler is registered with the OS, the watchdog timer is started,
2473 * and the stack is notified that the interface is ready.
2475 static int igb_open(struct net_device
*netdev
)
2477 struct igb_adapter
*adapter
= netdev_priv(netdev
);
2478 struct e1000_hw
*hw
= &adapter
->hw
;
2482 /* disallow open during test */
2483 if (test_bit(__IGB_TESTING
, &adapter
->state
))
2486 netif_carrier_off(netdev
);
2488 /* allocate transmit descriptors */
2489 err
= igb_setup_all_tx_resources(adapter
);
2493 /* allocate receive descriptors */
2494 err
= igb_setup_all_rx_resources(adapter
);
2498 igb_power_up_link(adapter
);
2500 /* before we allocate an interrupt, we must be ready to handle it.
2501 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2502 * as soon as we call pci_request_irq, so we have to setup our
2503 * clean_rx handler before we do so. */
2504 igb_configure(adapter
);
2506 err
= igb_request_irq(adapter
);
2510 /* From here on the code is the same as igb_up() */
2511 clear_bit(__IGB_DOWN
, &adapter
->state
);
2513 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
2514 napi_enable(&(adapter
->q_vector
[i
]->napi
));
2516 /* Clear any pending interrupts. */
2519 igb_irq_enable(adapter
);
2521 /* notify VFs that reset has been completed */
2522 if (adapter
->vfs_allocated_count
) {
2523 u32 reg_data
= rd32(E1000_CTRL_EXT
);
2524 reg_data
|= E1000_CTRL_EXT_PFRSTD
;
2525 wr32(E1000_CTRL_EXT
, reg_data
);
2528 netif_tx_start_all_queues(netdev
);
2530 /* start the watchdog. */
2531 hw
->mac
.get_link_status
= 1;
2532 schedule_work(&adapter
->watchdog_task
);
2537 igb_release_hw_control(adapter
);
2538 igb_power_down_link(adapter
);
2539 igb_free_all_rx_resources(adapter
);
2541 igb_free_all_tx_resources(adapter
);
2549 * igb_close - Disables a network interface
2550 * @netdev: network interface device structure
2552 * Returns 0, this is not allowed to fail
2554 * The close entry point is called when an interface is de-activated
2555 * by the OS. The hardware is still under the driver's control, but
2556 * needs to be disabled. A global MAC reset is issued to stop the
2557 * hardware, and all transmit and receive resources are freed.
2559 static int igb_close(struct net_device
*netdev
)
2561 struct igb_adapter
*adapter
= netdev_priv(netdev
);
2563 WARN_ON(test_bit(__IGB_RESETTING
, &adapter
->state
));
2566 igb_free_irq(adapter
);
2568 igb_free_all_tx_resources(adapter
);
2569 igb_free_all_rx_resources(adapter
);
2575 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2576 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2578 * Return 0 on success, negative on failure
2580 int igb_setup_tx_resources(struct igb_ring
*tx_ring
)
2582 struct device
*dev
= tx_ring
->dev
;
2583 int orig_node
= dev_to_node(dev
);
2586 size
= sizeof(struct igb_tx_buffer
) * tx_ring
->count
;
2587 tx_ring
->tx_buffer_info
= vzalloc_node(size
, tx_ring
->numa_node
);
2588 if (!tx_ring
->tx_buffer_info
)
2589 tx_ring
->tx_buffer_info
= vzalloc(size
);
2590 if (!tx_ring
->tx_buffer_info
)
2593 /* round up to nearest 4K */
2594 tx_ring
->size
= tx_ring
->count
* sizeof(union e1000_adv_tx_desc
);
2595 tx_ring
->size
= ALIGN(tx_ring
->size
, 4096);
2597 set_dev_node(dev
, tx_ring
->numa_node
);
2598 tx_ring
->desc
= dma_alloc_coherent(dev
,
2602 set_dev_node(dev
, orig_node
);
2604 tx_ring
->desc
= dma_alloc_coherent(dev
,
2612 tx_ring
->next_to_use
= 0;
2613 tx_ring
->next_to_clean
= 0;
2618 vfree(tx_ring
->tx_buffer_info
);
2620 "Unable to allocate memory for the transmit descriptor ring\n");
2625 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2626 * (Descriptors) for all queues
2627 * @adapter: board private structure
2629 * Return 0 on success, negative on failure
2631 static int igb_setup_all_tx_resources(struct igb_adapter
*adapter
)
2633 struct pci_dev
*pdev
= adapter
->pdev
;
2636 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
2637 err
= igb_setup_tx_resources(adapter
->tx_ring
[i
]);
2640 "Allocation for Tx Queue %u failed\n", i
);
2641 for (i
--; i
>= 0; i
--)
2642 igb_free_tx_resources(adapter
->tx_ring
[i
]);
2651 * igb_setup_tctl - configure the transmit control registers
2652 * @adapter: Board private structure
2654 void igb_setup_tctl(struct igb_adapter
*adapter
)
2656 struct e1000_hw
*hw
= &adapter
->hw
;
2659 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2660 wr32(E1000_TXDCTL(0), 0);
2662 /* Program the Transmit Control Register */
2663 tctl
= rd32(E1000_TCTL
);
2664 tctl
&= ~E1000_TCTL_CT
;
2665 tctl
|= E1000_TCTL_PSP
| E1000_TCTL_RTLC
|
2666 (E1000_COLLISION_THRESHOLD
<< E1000_CT_SHIFT
);
2668 igb_config_collision_dist(hw
);
2670 /* Enable transmits */
2671 tctl
|= E1000_TCTL_EN
;
2673 wr32(E1000_TCTL
, tctl
);
2677 * igb_configure_tx_ring - Configure transmit ring after Reset
2678 * @adapter: board private structure
2679 * @ring: tx ring to configure
2681 * Configure a transmit ring after a reset.
2683 void igb_configure_tx_ring(struct igb_adapter
*adapter
,
2684 struct igb_ring
*ring
)
2686 struct e1000_hw
*hw
= &adapter
->hw
;
2688 u64 tdba
= ring
->dma
;
2689 int reg_idx
= ring
->reg_idx
;
2691 /* disable the queue */
2692 wr32(E1000_TXDCTL(reg_idx
), 0);
2696 wr32(E1000_TDLEN(reg_idx
),
2697 ring
->count
* sizeof(union e1000_adv_tx_desc
));
2698 wr32(E1000_TDBAL(reg_idx
),
2699 tdba
& 0x00000000ffffffffULL
);
2700 wr32(E1000_TDBAH(reg_idx
), tdba
>> 32);
2702 ring
->tail
= hw
->hw_addr
+ E1000_TDT(reg_idx
);
2703 wr32(E1000_TDH(reg_idx
), 0);
2704 writel(0, ring
->tail
);
2706 txdctl
|= IGB_TX_PTHRESH
;
2707 txdctl
|= IGB_TX_HTHRESH
<< 8;
2708 txdctl
|= IGB_TX_WTHRESH
<< 16;
2710 txdctl
|= E1000_TXDCTL_QUEUE_ENABLE
;
2711 wr32(E1000_TXDCTL(reg_idx
), txdctl
);
2715 * igb_configure_tx - Configure transmit Unit after Reset
2716 * @adapter: board private structure
2718 * Configure the Tx unit of the MAC after a reset.
2720 static void igb_configure_tx(struct igb_adapter
*adapter
)
2724 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
2725 igb_configure_tx_ring(adapter
, adapter
->tx_ring
[i
]);
2729 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2730 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2732 * Returns 0 on success, negative on failure
2734 int igb_setup_rx_resources(struct igb_ring
*rx_ring
)
2736 struct device
*dev
= rx_ring
->dev
;
2737 int orig_node
= dev_to_node(dev
);
2740 size
= sizeof(struct igb_rx_buffer
) * rx_ring
->count
;
2741 rx_ring
->rx_buffer_info
= vzalloc_node(size
, rx_ring
->numa_node
);
2742 if (!rx_ring
->rx_buffer_info
)
2743 rx_ring
->rx_buffer_info
= vzalloc(size
);
2744 if (!rx_ring
->rx_buffer_info
)
2747 desc_len
= sizeof(union e1000_adv_rx_desc
);
2749 /* Round up to nearest 4K */
2750 rx_ring
->size
= rx_ring
->count
* desc_len
;
2751 rx_ring
->size
= ALIGN(rx_ring
->size
, 4096);
2753 set_dev_node(dev
, rx_ring
->numa_node
);
2754 rx_ring
->desc
= dma_alloc_coherent(dev
,
2758 set_dev_node(dev
, orig_node
);
2760 rx_ring
->desc
= dma_alloc_coherent(dev
,
2768 rx_ring
->next_to_clean
= 0;
2769 rx_ring
->next_to_use
= 0;
2774 vfree(rx_ring
->rx_buffer_info
);
2775 rx_ring
->rx_buffer_info
= NULL
;
2776 dev_err(dev
, "Unable to allocate memory for the receive descriptor"
2782 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2783 * (Descriptors) for all queues
2784 * @adapter: board private structure
2786 * Return 0 on success, negative on failure
2788 static int igb_setup_all_rx_resources(struct igb_adapter
*adapter
)
2790 struct pci_dev
*pdev
= adapter
->pdev
;
2793 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
2794 err
= igb_setup_rx_resources(adapter
->rx_ring
[i
]);
2797 "Allocation for Rx Queue %u failed\n", i
);
2798 for (i
--; i
>= 0; i
--)
2799 igb_free_rx_resources(adapter
->rx_ring
[i
]);
2808 * igb_setup_mrqc - configure the multiple receive queue control registers
2809 * @adapter: Board private structure
2811 static void igb_setup_mrqc(struct igb_adapter
*adapter
)
2813 struct e1000_hw
*hw
= &adapter
->hw
;
2815 u32 j
, num_rx_queues
, shift
= 0, shift2
= 0;
2820 static const u8 rsshash
[40] = {
2821 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2822 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2823 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2824 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2826 /* Fill out hash function seeds */
2827 for (j
= 0; j
< 10; j
++) {
2828 u32 rsskey
= rsshash
[(j
* 4)];
2829 rsskey
|= rsshash
[(j
* 4) + 1] << 8;
2830 rsskey
|= rsshash
[(j
* 4) + 2] << 16;
2831 rsskey
|= rsshash
[(j
* 4) + 3] << 24;
2832 array_wr32(E1000_RSSRK(0), j
, rsskey
);
2835 num_rx_queues
= adapter
->rss_queues
;
2837 if (adapter
->vfs_allocated_count
) {
2838 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2839 switch (hw
->mac
.type
) {
2856 if (hw
->mac
.type
== e1000_82575
)
2860 for (j
= 0; j
< (32 * 4); j
++) {
2861 reta
.bytes
[j
& 3] = (j
% num_rx_queues
) << shift
;
2863 reta
.bytes
[j
& 3] |= num_rx_queues
<< shift2
;
2865 wr32(E1000_RETA(j
>> 2), reta
.dword
);
2869 * Disable raw packet checksumming so that RSS hash is placed in
2870 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2871 * offloads as they are enabled by default
2873 rxcsum
= rd32(E1000_RXCSUM
);
2874 rxcsum
|= E1000_RXCSUM_PCSD
;
2876 if (adapter
->hw
.mac
.type
>= e1000_82576
)
2877 /* Enable Receive Checksum Offload for SCTP */
2878 rxcsum
|= E1000_RXCSUM_CRCOFL
;
2880 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2881 wr32(E1000_RXCSUM
, rxcsum
);
2883 /* If VMDq is enabled then we set the appropriate mode for that, else
2884 * we default to RSS so that an RSS hash is calculated per packet even
2885 * if we are only using one queue */
2886 if (adapter
->vfs_allocated_count
) {
2887 if (hw
->mac
.type
> e1000_82575
) {
2888 /* Set the default pool for the PF's first queue */
2889 u32 vtctl
= rd32(E1000_VT_CTL
);
2890 vtctl
&= ~(E1000_VT_CTL_DEFAULT_POOL_MASK
|
2891 E1000_VT_CTL_DISABLE_DEF_POOL
);
2892 vtctl
|= adapter
->vfs_allocated_count
<<
2893 E1000_VT_CTL_DEFAULT_POOL_SHIFT
;
2894 wr32(E1000_VT_CTL
, vtctl
);
2896 if (adapter
->rss_queues
> 1)
2897 mrqc
= E1000_MRQC_ENABLE_VMDQ_RSS_2Q
;
2899 mrqc
= E1000_MRQC_ENABLE_VMDQ
;
2901 mrqc
= E1000_MRQC_ENABLE_RSS_4Q
;
2903 igb_vmm_control(adapter
);
2906 * Generate RSS hash based on TCP port numbers and/or
2907 * IPv4/v6 src and dst addresses since UDP cannot be
2908 * hashed reliably due to IP fragmentation
2910 mrqc
|= E1000_MRQC_RSS_FIELD_IPV4
|
2911 E1000_MRQC_RSS_FIELD_IPV4_TCP
|
2912 E1000_MRQC_RSS_FIELD_IPV6
|
2913 E1000_MRQC_RSS_FIELD_IPV6_TCP
|
2914 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX
;
2916 wr32(E1000_MRQC
, mrqc
);
2920 * igb_setup_rctl - configure the receive control registers
2921 * @adapter: Board private structure
2923 void igb_setup_rctl(struct igb_adapter
*adapter
)
2925 struct e1000_hw
*hw
= &adapter
->hw
;
2928 rctl
= rd32(E1000_RCTL
);
2930 rctl
&= ~(3 << E1000_RCTL_MO_SHIFT
);
2931 rctl
&= ~(E1000_RCTL_LBM_TCVR
| E1000_RCTL_LBM_MAC
);
2933 rctl
|= E1000_RCTL_EN
| E1000_RCTL_BAM
| E1000_RCTL_RDMTS_HALF
|
2934 (hw
->mac
.mc_filter_type
<< E1000_RCTL_MO_SHIFT
);
2937 * enable stripping of CRC. It's unlikely this will break BMC
2938 * redirection as it did with e1000. Newer features require
2939 * that the HW strips the CRC.
2941 rctl
|= E1000_RCTL_SECRC
;
2943 /* disable store bad packets and clear size bits. */
2944 rctl
&= ~(E1000_RCTL_SBP
| E1000_RCTL_SZ_256
);
2946 /* enable LPE to prevent packets larger than max_frame_size */
2947 rctl
|= E1000_RCTL_LPE
;
2949 /* disable queue 0 to prevent tail write w/o re-config */
2950 wr32(E1000_RXDCTL(0), 0);
2952 /* Attention!!! For SR-IOV PF driver operations you must enable
2953 * queue drop for all VF and PF queues to prevent head of line blocking
2954 * if an un-trusted VF does not provide descriptors to hardware.
2956 if (adapter
->vfs_allocated_count
) {
2957 /* set all queue drop enable bits */
2958 wr32(E1000_QDE
, ALL_QUEUES
);
2961 wr32(E1000_RCTL
, rctl
);
2964 static inline int igb_set_vf_rlpml(struct igb_adapter
*adapter
, int size
,
2967 struct e1000_hw
*hw
= &adapter
->hw
;
2970 /* if it isn't the PF check to see if VFs are enabled and
2971 * increase the size to support vlan tags */
2972 if (vfn
< adapter
->vfs_allocated_count
&&
2973 adapter
->vf_data
[vfn
].vlans_enabled
)
2974 size
+= VLAN_TAG_SIZE
;
2976 vmolr
= rd32(E1000_VMOLR(vfn
));
2977 vmolr
&= ~E1000_VMOLR_RLPML_MASK
;
2978 vmolr
|= size
| E1000_VMOLR_LPE
;
2979 wr32(E1000_VMOLR(vfn
), vmolr
);
2985 * igb_rlpml_set - set maximum receive packet size
2986 * @adapter: board private structure
2988 * Configure maximum receivable packet size.
2990 static void igb_rlpml_set(struct igb_adapter
*adapter
)
2992 u32 max_frame_size
= adapter
->max_frame_size
;
2993 struct e1000_hw
*hw
= &adapter
->hw
;
2994 u16 pf_id
= adapter
->vfs_allocated_count
;
2997 igb_set_vf_rlpml(adapter
, max_frame_size
, pf_id
);
2999 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3000 * to our max jumbo frame size, in case we need to enable
3001 * jumbo frames on one of the rings later.
3002 * This will not pass over-length frames into the default
3003 * queue because it's gated by the VMOLR.RLPML.
3005 max_frame_size
= MAX_JUMBO_FRAME_SIZE
;
3008 wr32(E1000_RLPML
, max_frame_size
);
3011 static inline void igb_set_vmolr(struct igb_adapter
*adapter
,
3014 struct e1000_hw
*hw
= &adapter
->hw
;
3018 * This register exists only on 82576 and newer so if we are older then
3019 * we should exit and do nothing
3021 if (hw
->mac
.type
< e1000_82576
)
3024 vmolr
= rd32(E1000_VMOLR(vfn
));
3025 vmolr
|= E1000_VMOLR_STRVLAN
; /* Strip vlan tags */
3027 vmolr
|= E1000_VMOLR_AUPE
; /* Accept untagged packets */
3029 vmolr
&= ~(E1000_VMOLR_AUPE
); /* Tagged packets ONLY */
3031 /* clear all bits that might not be set */
3032 vmolr
&= ~(E1000_VMOLR_BAM
| E1000_VMOLR_RSSE
);
3034 if (adapter
->rss_queues
> 1 && vfn
== adapter
->vfs_allocated_count
)
3035 vmolr
|= E1000_VMOLR_RSSE
; /* enable RSS */
3037 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3040 if (vfn
<= adapter
->vfs_allocated_count
)
3041 vmolr
|= E1000_VMOLR_BAM
; /* Accept broadcast */
3043 wr32(E1000_VMOLR(vfn
), vmolr
);
3047 * igb_configure_rx_ring - Configure a receive ring after Reset
3048 * @adapter: board private structure
3049 * @ring: receive ring to be configured
3051 * Configure the Rx unit of the MAC after a reset.
3053 void igb_configure_rx_ring(struct igb_adapter
*adapter
,
3054 struct igb_ring
*ring
)
3056 struct e1000_hw
*hw
= &adapter
->hw
;
3057 u64 rdba
= ring
->dma
;
3058 int reg_idx
= ring
->reg_idx
;
3059 u32 srrctl
= 0, rxdctl
= 0;
3061 /* disable the queue */
3062 wr32(E1000_RXDCTL(reg_idx
), 0);
3064 /* Set DMA base address registers */
3065 wr32(E1000_RDBAL(reg_idx
),
3066 rdba
& 0x00000000ffffffffULL
);
3067 wr32(E1000_RDBAH(reg_idx
), rdba
>> 32);
3068 wr32(E1000_RDLEN(reg_idx
),
3069 ring
->count
* sizeof(union e1000_adv_rx_desc
));
3071 /* initialize head and tail */
3072 ring
->tail
= hw
->hw_addr
+ E1000_RDT(reg_idx
);
3073 wr32(E1000_RDH(reg_idx
), 0);
3074 writel(0, ring
->tail
);
3076 /* set descriptor configuration */
3077 srrctl
= IGB_RX_HDR_LEN
<< E1000_SRRCTL_BSIZEHDRSIZE_SHIFT
;
3078 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3079 srrctl
|= IGB_RXBUFFER_16384
>> E1000_SRRCTL_BSIZEPKT_SHIFT
;
3081 srrctl
|= (PAGE_SIZE
/ 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT
;
3083 srrctl
|= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS
;
3084 if (hw
->mac
.type
>= e1000_82580
)
3085 srrctl
|= E1000_SRRCTL_TIMESTAMP
;
3086 /* Only set Drop Enable if we are supporting multiple queues */
3087 if (adapter
->vfs_allocated_count
|| adapter
->num_rx_queues
> 1)
3088 srrctl
|= E1000_SRRCTL_DROP_EN
;
3090 wr32(E1000_SRRCTL(reg_idx
), srrctl
);
3092 /* set filtering for VMDQ pools */
3093 igb_set_vmolr(adapter
, reg_idx
& 0x7, true);
3095 rxdctl
|= IGB_RX_PTHRESH
;
3096 rxdctl
|= IGB_RX_HTHRESH
<< 8;
3097 rxdctl
|= IGB_RX_WTHRESH
<< 16;
3099 /* enable receive descriptor fetching */
3100 rxdctl
|= E1000_RXDCTL_QUEUE_ENABLE
;
3101 wr32(E1000_RXDCTL(reg_idx
), rxdctl
);
3105 * igb_configure_rx - Configure receive Unit after Reset
3106 * @adapter: board private structure
3108 * Configure the Rx unit of the MAC after a reset.
3110 static void igb_configure_rx(struct igb_adapter
*adapter
)
3114 /* set UTA to appropriate mode */
3115 igb_set_uta(adapter
);
3117 /* set the correct pool for the PF default MAC address in entry 0 */
3118 igb_rar_set_qsel(adapter
, adapter
->hw
.mac
.addr
, 0,
3119 adapter
->vfs_allocated_count
);
3121 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3122 * the Base and Length of the Rx Descriptor Ring */
3123 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
3124 igb_configure_rx_ring(adapter
, adapter
->rx_ring
[i
]);
3128 * igb_free_tx_resources - Free Tx Resources per Queue
3129 * @tx_ring: Tx descriptor ring for a specific queue
3131 * Free all transmit software resources
3133 void igb_free_tx_resources(struct igb_ring
*tx_ring
)
3135 igb_clean_tx_ring(tx_ring
);
3137 vfree(tx_ring
->tx_buffer_info
);
3138 tx_ring
->tx_buffer_info
= NULL
;
3140 /* if not set, then don't free */
3144 dma_free_coherent(tx_ring
->dev
, tx_ring
->size
,
3145 tx_ring
->desc
, tx_ring
->dma
);
3147 tx_ring
->desc
= NULL
;
3151 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3152 * @adapter: board private structure
3154 * Free all transmit software resources
3156 static void igb_free_all_tx_resources(struct igb_adapter
*adapter
)
3160 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
3161 igb_free_tx_resources(adapter
->tx_ring
[i
]);
3164 void igb_unmap_and_free_tx_resource(struct igb_ring
*ring
,
3165 struct igb_tx_buffer
*tx_buffer
)
3167 if (tx_buffer
->skb
) {
3168 dev_kfree_skb_any(tx_buffer
->skb
);
3170 dma_unmap_single(ring
->dev
,
3174 } else if (tx_buffer
->dma
) {
3175 dma_unmap_page(ring
->dev
,
3180 tx_buffer
->next_to_watch
= NULL
;
3181 tx_buffer
->skb
= NULL
;
3183 /* buffer_info must be completely set up in the transmit path */
3187 * igb_clean_tx_ring - Free Tx Buffers
3188 * @tx_ring: ring to be cleaned
3190 static void igb_clean_tx_ring(struct igb_ring
*tx_ring
)
3192 struct igb_tx_buffer
*buffer_info
;
3196 if (!tx_ring
->tx_buffer_info
)
3198 /* Free all the Tx ring sk_buffs */
3200 for (i
= 0; i
< tx_ring
->count
; i
++) {
3201 buffer_info
= &tx_ring
->tx_buffer_info
[i
];
3202 igb_unmap_and_free_tx_resource(tx_ring
, buffer_info
);
3205 size
= sizeof(struct igb_tx_buffer
) * tx_ring
->count
;
3206 memset(tx_ring
->tx_buffer_info
, 0, size
);
3208 /* Zero out the descriptor ring */
3209 memset(tx_ring
->desc
, 0, tx_ring
->size
);
3211 tx_ring
->next_to_use
= 0;
3212 tx_ring
->next_to_clean
= 0;
3216 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3217 * @adapter: board private structure
3219 static void igb_clean_all_tx_rings(struct igb_adapter
*adapter
)
3223 for (i
= 0; i
< adapter
->num_tx_queues
; i
++)
3224 igb_clean_tx_ring(adapter
->tx_ring
[i
]);
3228 * igb_free_rx_resources - Free Rx Resources
3229 * @rx_ring: ring to clean the resources from
3231 * Free all receive software resources
3233 void igb_free_rx_resources(struct igb_ring
*rx_ring
)
3235 igb_clean_rx_ring(rx_ring
);
3237 vfree(rx_ring
->rx_buffer_info
);
3238 rx_ring
->rx_buffer_info
= NULL
;
3240 /* if not set, then don't free */
3244 dma_free_coherent(rx_ring
->dev
, rx_ring
->size
,
3245 rx_ring
->desc
, rx_ring
->dma
);
3247 rx_ring
->desc
= NULL
;
3251 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3252 * @adapter: board private structure
3254 * Free all receive software resources
3256 static void igb_free_all_rx_resources(struct igb_adapter
*adapter
)
3260 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
3261 igb_free_rx_resources(adapter
->rx_ring
[i
]);
3265 * igb_clean_rx_ring - Free Rx Buffers per Queue
3266 * @rx_ring: ring to free buffers from
3268 static void igb_clean_rx_ring(struct igb_ring
*rx_ring
)
3273 if (!rx_ring
->rx_buffer_info
)
3276 /* Free all the Rx ring sk_buffs */
3277 for (i
= 0; i
< rx_ring
->count
; i
++) {
3278 struct igb_rx_buffer
*buffer_info
= &rx_ring
->rx_buffer_info
[i
];
3279 if (buffer_info
->dma
) {
3280 dma_unmap_single(rx_ring
->dev
,
3284 buffer_info
->dma
= 0;
3287 if (buffer_info
->skb
) {
3288 dev_kfree_skb(buffer_info
->skb
);
3289 buffer_info
->skb
= NULL
;
3291 if (buffer_info
->page_dma
) {
3292 dma_unmap_page(rx_ring
->dev
,
3293 buffer_info
->page_dma
,
3296 buffer_info
->page_dma
= 0;
3298 if (buffer_info
->page
) {
3299 put_page(buffer_info
->page
);
3300 buffer_info
->page
= NULL
;
3301 buffer_info
->page_offset
= 0;
3305 size
= sizeof(struct igb_rx_buffer
) * rx_ring
->count
;
3306 memset(rx_ring
->rx_buffer_info
, 0, size
);
3308 /* Zero out the descriptor ring */
3309 memset(rx_ring
->desc
, 0, rx_ring
->size
);
3311 rx_ring
->next_to_clean
= 0;
3312 rx_ring
->next_to_use
= 0;
3316 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3317 * @adapter: board private structure
3319 static void igb_clean_all_rx_rings(struct igb_adapter
*adapter
)
3323 for (i
= 0; i
< adapter
->num_rx_queues
; i
++)
3324 igb_clean_rx_ring(adapter
->rx_ring
[i
]);
3328 * igb_set_mac - Change the Ethernet Address of the NIC
3329 * @netdev: network interface device structure
3330 * @p: pointer to an address structure
3332 * Returns 0 on success, negative on failure
3334 static int igb_set_mac(struct net_device
*netdev
, void *p
)
3336 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3337 struct e1000_hw
*hw
= &adapter
->hw
;
3338 struct sockaddr
*addr
= p
;
3340 if (!is_valid_ether_addr(addr
->sa_data
))
3341 return -EADDRNOTAVAIL
;
3343 memcpy(netdev
->dev_addr
, addr
->sa_data
, netdev
->addr_len
);
3344 memcpy(hw
->mac
.addr
, addr
->sa_data
, netdev
->addr_len
);
3346 /* set the correct pool for the new PF MAC address in entry 0 */
3347 igb_rar_set_qsel(adapter
, hw
->mac
.addr
, 0,
3348 adapter
->vfs_allocated_count
);
3354 * igb_write_mc_addr_list - write multicast addresses to MTA
3355 * @netdev: network interface device structure
3357 * Writes multicast address list to the MTA hash table.
3358 * Returns: -ENOMEM on failure
3359 * 0 on no addresses written
3360 * X on writing X addresses to MTA
3362 static int igb_write_mc_addr_list(struct net_device
*netdev
)
3364 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3365 struct e1000_hw
*hw
= &adapter
->hw
;
3366 struct netdev_hw_addr
*ha
;
3370 if (netdev_mc_empty(netdev
)) {
3371 /* nothing to program, so clear mc list */
3372 igb_update_mc_addr_list(hw
, NULL
, 0);
3373 igb_restore_vf_multicasts(adapter
);
3377 mta_list
= kzalloc(netdev_mc_count(netdev
) * 6, GFP_ATOMIC
);
3381 /* The shared function expects a packed array of only addresses. */
3383 netdev_for_each_mc_addr(ha
, netdev
)
3384 memcpy(mta_list
+ (i
++ * ETH_ALEN
), ha
->addr
, ETH_ALEN
);
3386 igb_update_mc_addr_list(hw
, mta_list
, i
);
3389 return netdev_mc_count(netdev
);
3393 * igb_write_uc_addr_list - write unicast addresses to RAR table
3394 * @netdev: network interface device structure
3396 * Writes unicast address list to the RAR table.
3397 * Returns: -ENOMEM on failure/insufficient address space
3398 * 0 on no addresses written
3399 * X on writing X addresses to the RAR table
3401 static int igb_write_uc_addr_list(struct net_device
*netdev
)
3403 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3404 struct e1000_hw
*hw
= &adapter
->hw
;
3405 unsigned int vfn
= adapter
->vfs_allocated_count
;
3406 unsigned int rar_entries
= hw
->mac
.rar_entry_count
- (vfn
+ 1);
3409 /* return ENOMEM indicating insufficient memory for addresses */
3410 if (netdev_uc_count(netdev
) > rar_entries
)
3413 if (!netdev_uc_empty(netdev
) && rar_entries
) {
3414 struct netdev_hw_addr
*ha
;
3416 netdev_for_each_uc_addr(ha
, netdev
) {
3419 igb_rar_set_qsel(adapter
, ha
->addr
,
3425 /* write the addresses in reverse order to avoid write combining */
3426 for (; rar_entries
> 0 ; rar_entries
--) {
3427 wr32(E1000_RAH(rar_entries
), 0);
3428 wr32(E1000_RAL(rar_entries
), 0);
3436 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3437 * @netdev: network interface device structure
3439 * The set_rx_mode entry point is called whenever the unicast or multicast
3440 * address lists or the network interface flags are updated. This routine is
3441 * responsible for configuring the hardware for proper unicast, multicast,
3442 * promiscuous mode, and all-multi behavior.
3444 static void igb_set_rx_mode(struct net_device
*netdev
)
3446 struct igb_adapter
*adapter
= netdev_priv(netdev
);
3447 struct e1000_hw
*hw
= &adapter
->hw
;
3448 unsigned int vfn
= adapter
->vfs_allocated_count
;
3449 u32 rctl
, vmolr
= 0;
3452 /* Check for Promiscuous and All Multicast modes */
3453 rctl
= rd32(E1000_RCTL
);
3455 /* clear the effected bits */
3456 rctl
&= ~(E1000_RCTL_UPE
| E1000_RCTL_MPE
| E1000_RCTL_VFE
);
3458 if (netdev
->flags
& IFF_PROMISC
) {
3459 rctl
|= (E1000_RCTL_UPE
| E1000_RCTL_MPE
);
3460 vmolr
|= (E1000_VMOLR_ROPE
| E1000_VMOLR_MPME
);
3462 if (netdev
->flags
& IFF_ALLMULTI
) {
3463 rctl
|= E1000_RCTL_MPE
;
3464 vmolr
|= E1000_VMOLR_MPME
;
3467 * Write addresses to the MTA, if the attempt fails
3468 * then we should just turn on promiscuous mode so
3469 * that we can at least receive multicast traffic
3471 count
= igb_write_mc_addr_list(netdev
);
3473 rctl
|= E1000_RCTL_MPE
;
3474 vmolr
|= E1000_VMOLR_MPME
;
3476 vmolr
|= E1000_VMOLR_ROMPE
;
3480 * Write addresses to available RAR registers, if there is not
3481 * sufficient space to store all the addresses then enable
3482 * unicast promiscuous mode
3484 count
= igb_write_uc_addr_list(netdev
);
3486 rctl
|= E1000_RCTL_UPE
;
3487 vmolr
|= E1000_VMOLR_ROPE
;
3489 rctl
|= E1000_RCTL_VFE
;
3491 wr32(E1000_RCTL
, rctl
);
3494 * In order to support SR-IOV and eventually VMDq it is necessary to set
3495 * the VMOLR to enable the appropriate modes. Without this workaround
3496 * we will have issues with VLAN tag stripping not being done for frames
3497 * that are only arriving because we are the default pool
3499 if (hw
->mac
.type
< e1000_82576
)
3502 vmolr
|= rd32(E1000_VMOLR(vfn
)) &
3503 ~(E1000_VMOLR_ROPE
| E1000_VMOLR_MPME
| E1000_VMOLR_ROMPE
);
3504 wr32(E1000_VMOLR(vfn
), vmolr
);
3505 igb_restore_vf_multicasts(adapter
);
3508 static void igb_check_wvbr(struct igb_adapter
*adapter
)
3510 struct e1000_hw
*hw
= &adapter
->hw
;
3513 switch (hw
->mac
.type
) {
3516 if (!(wvbr
= rd32(E1000_WVBR
)))
3523 adapter
->wvbr
|= wvbr
;
3526 #define IGB_STAGGERED_QUEUE_OFFSET 8
3528 static void igb_spoof_check(struct igb_adapter
*adapter
)
3535 for(j
= 0; j
< adapter
->vfs_allocated_count
; j
++) {
3536 if (adapter
->wvbr
& (1 << j
) ||
3537 adapter
->wvbr
& (1 << (j
+ IGB_STAGGERED_QUEUE_OFFSET
))) {
3538 dev_warn(&adapter
->pdev
->dev
,
3539 "Spoof event(s) detected on VF %d\n", j
);
3542 (1 << (j
+ IGB_STAGGERED_QUEUE_OFFSET
)));
3547 /* Need to wait a few seconds after link up to get diagnostic information from
3549 static void igb_update_phy_info(unsigned long data
)
3551 struct igb_adapter
*adapter
= (struct igb_adapter
*) data
;
3552 igb_get_phy_info(&adapter
->hw
);
3556 * igb_has_link - check shared code for link and determine up/down
3557 * @adapter: pointer to driver private info
3559 bool igb_has_link(struct igb_adapter
*adapter
)
3561 struct e1000_hw
*hw
= &adapter
->hw
;
3562 bool link_active
= false;
3565 /* get_link_status is set on LSC (link status) interrupt or
3566 * rx sequence error interrupt. get_link_status will stay
3567 * false until the e1000_check_for_link establishes link
3568 * for copper adapters ONLY
3570 switch (hw
->phy
.media_type
) {
3571 case e1000_media_type_copper
:
3572 if (hw
->mac
.get_link_status
) {
3573 ret_val
= hw
->mac
.ops
.check_for_link(hw
);
3574 link_active
= !hw
->mac
.get_link_status
;
3579 case e1000_media_type_internal_serdes
:
3580 ret_val
= hw
->mac
.ops
.check_for_link(hw
);
3581 link_active
= hw
->mac
.serdes_has_link
;
3584 case e1000_media_type_unknown
:
3591 static bool igb_thermal_sensor_event(struct e1000_hw
*hw
, u32 event
)
3594 u32 ctrl_ext
, thstat
;
3596 /* check for thermal sensor event on i350, copper only */
3597 if (hw
->mac
.type
== e1000_i350
) {
3598 thstat
= rd32(E1000_THSTAT
);
3599 ctrl_ext
= rd32(E1000_CTRL_EXT
);
3601 if ((hw
->phy
.media_type
== e1000_media_type_copper
) &&
3602 !(ctrl_ext
& E1000_CTRL_EXT_LINK_MODE_SGMII
)) {
3603 ret
= !!(thstat
& event
);
3611 * igb_watchdog - Timer Call-back
3612 * @data: pointer to adapter cast into an unsigned long
3614 static void igb_watchdog(unsigned long data
)
3616 struct igb_adapter
*adapter
= (struct igb_adapter
*)data
;
3617 /* Do the rest outside of interrupt context */
3618 schedule_work(&adapter
->watchdog_task
);
3621 static void igb_watchdog_task(struct work_struct
*work
)
3623 struct igb_adapter
*adapter
= container_of(work
,
3626 struct e1000_hw
*hw
= &adapter
->hw
;
3627 struct net_device
*netdev
= adapter
->netdev
;
3631 link
= igb_has_link(adapter
);
3633 if (!netif_carrier_ok(netdev
)) {
3635 hw
->mac
.ops
.get_speed_and_duplex(hw
,
3636 &adapter
->link_speed
,
3637 &adapter
->link_duplex
);
3639 ctrl
= rd32(E1000_CTRL
);
3640 /* Links status message must follow this format */
3641 printk(KERN_INFO
"igb: %s NIC Link is Up %d Mbps %s "
3642 "Duplex, Flow Control: %s\n",
3644 adapter
->link_speed
,
3645 adapter
->link_duplex
== FULL_DUPLEX
?
3647 (ctrl
& E1000_CTRL_TFCE
) &&
3648 (ctrl
& E1000_CTRL_RFCE
) ? "RX/TX" :
3649 (ctrl
& E1000_CTRL_RFCE
) ? "RX" :
3650 (ctrl
& E1000_CTRL_TFCE
) ? "TX" : "None");
3652 /* check for thermal sensor event */
3653 if (igb_thermal_sensor_event(hw
,
3654 E1000_THSTAT_LINK_THROTTLE
)) {
3655 netdev_info(netdev
, "The network adapter link "
3656 "speed was downshifted because it "
3660 /* adjust timeout factor according to speed/duplex */
3661 adapter
->tx_timeout_factor
= 1;
3662 switch (adapter
->link_speed
) {
3664 adapter
->tx_timeout_factor
= 14;
3667 /* maybe add some timeout factor ? */
3671 netif_carrier_on(netdev
);
3673 igb_ping_all_vfs(adapter
);
3674 igb_check_vf_rate_limit(adapter
);
3676 /* link state has changed, schedule phy info update */
3677 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
3678 mod_timer(&adapter
->phy_info_timer
,
3679 round_jiffies(jiffies
+ 2 * HZ
));
3682 if (netif_carrier_ok(netdev
)) {
3683 adapter
->link_speed
= 0;
3684 adapter
->link_duplex
= 0;
3686 /* check for thermal sensor event */
3687 if (igb_thermal_sensor_event(hw
,
3688 E1000_THSTAT_PWR_DOWN
)) {
3689 netdev_err(netdev
, "The network adapter was "
3690 "stopped because it overheated\n");
3693 /* Links status message must follow this format */
3694 printk(KERN_INFO
"igb: %s NIC Link is Down\n",
3696 netif_carrier_off(netdev
);
3698 igb_ping_all_vfs(adapter
);
3700 /* link state has changed, schedule phy info update */
3701 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
3702 mod_timer(&adapter
->phy_info_timer
,
3703 round_jiffies(jiffies
+ 2 * HZ
));
3707 spin_lock(&adapter
->stats64_lock
);
3708 igb_update_stats(adapter
, &adapter
->stats64
);
3709 spin_unlock(&adapter
->stats64_lock
);
3711 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
3712 struct igb_ring
*tx_ring
= adapter
->tx_ring
[i
];
3713 if (!netif_carrier_ok(netdev
)) {
3714 /* We've lost link, so the controller stops DMA,
3715 * but we've got queued Tx work that's never going
3716 * to get done, so reset controller to flush Tx.
3717 * (Do the reset outside of interrupt context). */
3718 if (igb_desc_unused(tx_ring
) + 1 < tx_ring
->count
) {
3719 adapter
->tx_timeout_count
++;
3720 schedule_work(&adapter
->reset_task
);
3721 /* return immediately since reset is imminent */
3726 /* Force detection of hung controller every watchdog period */
3727 set_bit(IGB_RING_FLAG_TX_DETECT_HANG
, &tx_ring
->flags
);
3730 /* Cause software interrupt to ensure rx ring is cleaned */
3731 if (adapter
->msix_entries
) {
3733 for (i
= 0; i
< adapter
->num_q_vectors
; i
++)
3734 eics
|= adapter
->q_vector
[i
]->eims_value
;
3735 wr32(E1000_EICS
, eics
);
3737 wr32(E1000_ICS
, E1000_ICS_RXDMT0
);
3740 igb_spoof_check(adapter
);
3742 /* Reset the timer */
3743 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
3744 mod_timer(&adapter
->watchdog_timer
,
3745 round_jiffies(jiffies
+ 2 * HZ
));
3748 enum latency_range
{
3752 latency_invalid
= 255
3756 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3758 * Stores a new ITR value based on strictly on packet size. This
3759 * algorithm is less sophisticated than that used in igb_update_itr,
3760 * due to the difficulty of synchronizing statistics across multiple
3761 * receive rings. The divisors and thresholds used by this function
3762 * were determined based on theoretical maximum wire speed and testing
3763 * data, in order to minimize response time while increasing bulk
3765 * This functionality is controlled by the InterruptThrottleRate module
3766 * parameter (see igb_param.c)
3767 * NOTE: This function is called only when operating in a multiqueue
3768 * receive environment.
3769 * @q_vector: pointer to q_vector
3771 static void igb_update_ring_itr(struct igb_q_vector
*q_vector
)
3773 int new_val
= q_vector
->itr_val
;
3774 int avg_wire_size
= 0;
3775 struct igb_adapter
*adapter
= q_vector
->adapter
;
3776 unsigned int packets
;
3778 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3779 * ints/sec - ITR timer value of 120 ticks.
3781 if (adapter
->link_speed
!= SPEED_1000
) {
3782 new_val
= IGB_4K_ITR
;
3786 packets
= q_vector
->rx
.total_packets
;
3788 avg_wire_size
= q_vector
->rx
.total_bytes
/ packets
;
3790 packets
= q_vector
->tx
.total_packets
;
3792 avg_wire_size
= max_t(u32
, avg_wire_size
,
3793 q_vector
->tx
.total_bytes
/ packets
);
3795 /* if avg_wire_size isn't set no work was done */
3799 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3800 avg_wire_size
+= 24;
3802 /* Don't starve jumbo frames */
3803 avg_wire_size
= min(avg_wire_size
, 3000);
3805 /* Give a little boost to mid-size frames */
3806 if ((avg_wire_size
> 300) && (avg_wire_size
< 1200))
3807 new_val
= avg_wire_size
/ 3;
3809 new_val
= avg_wire_size
/ 2;
3811 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3812 if (new_val
< IGB_20K_ITR
&&
3813 ((q_vector
->rx
.ring
&& adapter
->rx_itr_setting
== 3) ||
3814 (!q_vector
->rx
.ring
&& adapter
->tx_itr_setting
== 3)))
3815 new_val
= IGB_20K_ITR
;
3818 if (new_val
!= q_vector
->itr_val
) {
3819 q_vector
->itr_val
= new_val
;
3820 q_vector
->set_itr
= 1;
3823 q_vector
->rx
.total_bytes
= 0;
3824 q_vector
->rx
.total_packets
= 0;
3825 q_vector
->tx
.total_bytes
= 0;
3826 q_vector
->tx
.total_packets
= 0;
3830 * igb_update_itr - update the dynamic ITR value based on statistics
3831 * Stores a new ITR value based on packets and byte
3832 * counts during the last interrupt. The advantage of per interrupt
3833 * computation is faster updates and more accurate ITR for the current
3834 * traffic pattern. Constants in this function were computed
3835 * based on theoretical maximum wire speed and thresholds were set based
3836 * on testing data as well as attempting to minimize response time
3837 * while increasing bulk throughput.
3838 * this functionality is controlled by the InterruptThrottleRate module
3839 * parameter (see igb_param.c)
3840 * NOTE: These calculations are only valid when operating in a single-
3841 * queue environment.
3842 * @q_vector: pointer to q_vector
3843 * @ring_container: ring info to update the itr for
3845 static void igb_update_itr(struct igb_q_vector
*q_vector
,
3846 struct igb_ring_container
*ring_container
)
3848 unsigned int packets
= ring_container
->total_packets
;
3849 unsigned int bytes
= ring_container
->total_bytes
;
3850 u8 itrval
= ring_container
->itr
;
3852 /* no packets, exit with status unchanged */
3857 case lowest_latency
:
3858 /* handle TSO and jumbo frames */
3859 if (bytes
/packets
> 8000)
3860 itrval
= bulk_latency
;
3861 else if ((packets
< 5) && (bytes
> 512))
3862 itrval
= low_latency
;
3864 case low_latency
: /* 50 usec aka 20000 ints/s */
3865 if (bytes
> 10000) {
3866 /* this if handles the TSO accounting */
3867 if (bytes
/packets
> 8000) {
3868 itrval
= bulk_latency
;
3869 } else if ((packets
< 10) || ((bytes
/packets
) > 1200)) {
3870 itrval
= bulk_latency
;
3871 } else if ((packets
> 35)) {
3872 itrval
= lowest_latency
;
3874 } else if (bytes
/packets
> 2000) {
3875 itrval
= bulk_latency
;
3876 } else if (packets
<= 2 && bytes
< 512) {
3877 itrval
= lowest_latency
;
3880 case bulk_latency
: /* 250 usec aka 4000 ints/s */
3881 if (bytes
> 25000) {
3883 itrval
= low_latency
;
3884 } else if (bytes
< 1500) {
3885 itrval
= low_latency
;
3890 /* clear work counters since we have the values we need */
3891 ring_container
->total_bytes
= 0;
3892 ring_container
->total_packets
= 0;
3894 /* write updated itr to ring container */
3895 ring_container
->itr
= itrval
;
3898 static void igb_set_itr(struct igb_q_vector
*q_vector
)
3900 struct igb_adapter
*adapter
= q_vector
->adapter
;
3901 u32 new_itr
= q_vector
->itr_val
;
3904 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3905 if (adapter
->link_speed
!= SPEED_1000
) {
3907 new_itr
= IGB_4K_ITR
;
3911 igb_update_itr(q_vector
, &q_vector
->tx
);
3912 igb_update_itr(q_vector
, &q_vector
->rx
);
3914 current_itr
= max(q_vector
->rx
.itr
, q_vector
->tx
.itr
);
3916 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3917 if (current_itr
== lowest_latency
&&
3918 ((q_vector
->rx
.ring
&& adapter
->rx_itr_setting
== 3) ||
3919 (!q_vector
->rx
.ring
&& adapter
->tx_itr_setting
== 3)))
3920 current_itr
= low_latency
;
3922 switch (current_itr
) {
3923 /* counts and packets in update_itr are dependent on these numbers */
3924 case lowest_latency
:
3925 new_itr
= IGB_70K_ITR
; /* 70,000 ints/sec */
3928 new_itr
= IGB_20K_ITR
; /* 20,000 ints/sec */
3931 new_itr
= IGB_4K_ITR
; /* 4,000 ints/sec */
3938 if (new_itr
!= q_vector
->itr_val
) {
3939 /* this attempts to bias the interrupt rate towards Bulk
3940 * by adding intermediate steps when interrupt rate is
3942 new_itr
= new_itr
> q_vector
->itr_val
?
3943 max((new_itr
* q_vector
->itr_val
) /
3944 (new_itr
+ (q_vector
->itr_val
>> 2)),
3947 /* Don't write the value here; it resets the adapter's
3948 * internal timer, and causes us to delay far longer than
3949 * we should between interrupts. Instead, we write the ITR
3950 * value at the beginning of the next interrupt so the timing
3951 * ends up being correct.
3953 q_vector
->itr_val
= new_itr
;
3954 q_vector
->set_itr
= 1;
3958 void igb_tx_ctxtdesc(struct igb_ring
*tx_ring
, u32 vlan_macip_lens
,
3959 u32 type_tucmd
, u32 mss_l4len_idx
)
3961 struct e1000_adv_tx_context_desc
*context_desc
;
3962 u16 i
= tx_ring
->next_to_use
;
3964 context_desc
= IGB_TX_CTXTDESC(tx_ring
, i
);
3967 tx_ring
->next_to_use
= (i
< tx_ring
->count
) ? i
: 0;
3969 /* set bits to identify this as an advanced context descriptor */
3970 type_tucmd
|= E1000_TXD_CMD_DEXT
| E1000_ADVTXD_DTYP_CTXT
;
3972 /* For 82575, context index must be unique per ring. */
3973 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX
, &tx_ring
->flags
))
3974 mss_l4len_idx
|= tx_ring
->reg_idx
<< 4;
3976 context_desc
->vlan_macip_lens
= cpu_to_le32(vlan_macip_lens
);
3977 context_desc
->seqnum_seed
= 0;
3978 context_desc
->type_tucmd_mlhl
= cpu_to_le32(type_tucmd
);
3979 context_desc
->mss_l4len_idx
= cpu_to_le32(mss_l4len_idx
);
3982 static int igb_tso(struct igb_ring
*tx_ring
,
3983 struct igb_tx_buffer
*first
,
3986 struct sk_buff
*skb
= first
->skb
;
3987 u32 vlan_macip_lens
, type_tucmd
;
3988 u32 mss_l4len_idx
, l4len
;
3990 if (!skb_is_gso(skb
))
3993 if (skb_header_cloned(skb
)) {
3994 int err
= pskb_expand_head(skb
, 0, 0, GFP_ATOMIC
);
3999 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4000 type_tucmd
= E1000_ADVTXD_TUCMD_L4T_TCP
;
4002 if (first
->protocol
== __constant_htons(ETH_P_IP
)) {
4003 struct iphdr
*iph
= ip_hdr(skb
);
4006 tcp_hdr(skb
)->check
= ~csum_tcpudp_magic(iph
->saddr
,
4010 type_tucmd
|= E1000_ADVTXD_TUCMD_IPV4
;
4011 first
->tx_flags
|= IGB_TX_FLAGS_TSO
|
4014 } else if (skb_is_gso_v6(skb
)) {
4015 ipv6_hdr(skb
)->payload_len
= 0;
4016 tcp_hdr(skb
)->check
= ~csum_ipv6_magic(&ipv6_hdr(skb
)->saddr
,
4017 &ipv6_hdr(skb
)->daddr
,
4019 first
->tx_flags
|= IGB_TX_FLAGS_TSO
|
4023 /* compute header lengths */
4024 l4len
= tcp_hdrlen(skb
);
4025 *hdr_len
= skb_transport_offset(skb
) + l4len
;
4027 /* update gso size and bytecount with header size */
4028 first
->gso_segs
= skb_shinfo(skb
)->gso_segs
;
4029 first
->bytecount
+= (first
->gso_segs
- 1) * *hdr_len
;
4032 mss_l4len_idx
= l4len
<< E1000_ADVTXD_L4LEN_SHIFT
;
4033 mss_l4len_idx
|= skb_shinfo(skb
)->gso_size
<< E1000_ADVTXD_MSS_SHIFT
;
4035 /* VLAN MACLEN IPLEN */
4036 vlan_macip_lens
= skb_network_header_len(skb
);
4037 vlan_macip_lens
|= skb_network_offset(skb
) << E1000_ADVTXD_MACLEN_SHIFT
;
4038 vlan_macip_lens
|= first
->tx_flags
& IGB_TX_FLAGS_VLAN_MASK
;
4040 igb_tx_ctxtdesc(tx_ring
, vlan_macip_lens
, type_tucmd
, mss_l4len_idx
);
4045 static void igb_tx_csum(struct igb_ring
*tx_ring
, struct igb_tx_buffer
*first
)
4047 struct sk_buff
*skb
= first
->skb
;
4048 u32 vlan_macip_lens
= 0;
4049 u32 mss_l4len_idx
= 0;
4052 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
) {
4053 if (!(first
->tx_flags
& IGB_TX_FLAGS_VLAN
))
4057 switch (first
->protocol
) {
4058 case __constant_htons(ETH_P_IP
):
4059 vlan_macip_lens
|= skb_network_header_len(skb
);
4060 type_tucmd
|= E1000_ADVTXD_TUCMD_IPV4
;
4061 l4_hdr
= ip_hdr(skb
)->protocol
;
4063 case __constant_htons(ETH_P_IPV6
):
4064 vlan_macip_lens
|= skb_network_header_len(skb
);
4065 l4_hdr
= ipv6_hdr(skb
)->nexthdr
;
4068 if (unlikely(net_ratelimit())) {
4069 dev_warn(tx_ring
->dev
,
4070 "partial checksum but proto=%x!\n",
4078 type_tucmd
|= E1000_ADVTXD_TUCMD_L4T_TCP
;
4079 mss_l4len_idx
= tcp_hdrlen(skb
) <<
4080 E1000_ADVTXD_L4LEN_SHIFT
;
4083 type_tucmd
|= E1000_ADVTXD_TUCMD_L4T_SCTP
;
4084 mss_l4len_idx
= sizeof(struct sctphdr
) <<
4085 E1000_ADVTXD_L4LEN_SHIFT
;
4088 mss_l4len_idx
= sizeof(struct udphdr
) <<
4089 E1000_ADVTXD_L4LEN_SHIFT
;
4092 if (unlikely(net_ratelimit())) {
4093 dev_warn(tx_ring
->dev
,
4094 "partial checksum but l4 proto=%x!\n",
4100 /* update TX checksum flag */
4101 first
->tx_flags
|= IGB_TX_FLAGS_CSUM
;
4104 vlan_macip_lens
|= skb_network_offset(skb
) << E1000_ADVTXD_MACLEN_SHIFT
;
4105 vlan_macip_lens
|= first
->tx_flags
& IGB_TX_FLAGS_VLAN_MASK
;
4107 igb_tx_ctxtdesc(tx_ring
, vlan_macip_lens
, type_tucmd
, mss_l4len_idx
);
4110 static __le32
igb_tx_cmd_type(u32 tx_flags
)
4112 /* set type for advanced descriptor with frame checksum insertion */
4113 __le32 cmd_type
= cpu_to_le32(E1000_ADVTXD_DTYP_DATA
|
4114 E1000_ADVTXD_DCMD_IFCS
|
4115 E1000_ADVTXD_DCMD_DEXT
);
4117 /* set HW vlan bit if vlan is present */
4118 if (tx_flags
& IGB_TX_FLAGS_VLAN
)
4119 cmd_type
|= cpu_to_le32(E1000_ADVTXD_DCMD_VLE
);
4121 /* set timestamp bit if present */
4122 if (tx_flags
& IGB_TX_FLAGS_TSTAMP
)
4123 cmd_type
|= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP
);
4125 /* set segmentation bits for TSO */
4126 if (tx_flags
& IGB_TX_FLAGS_TSO
)
4127 cmd_type
|= cpu_to_le32(E1000_ADVTXD_DCMD_TSE
);
4132 static void igb_tx_olinfo_status(struct igb_ring
*tx_ring
,
4133 union e1000_adv_tx_desc
*tx_desc
,
4134 u32 tx_flags
, unsigned int paylen
)
4136 u32 olinfo_status
= paylen
<< E1000_ADVTXD_PAYLEN_SHIFT
;
4138 /* 82575 requires a unique index per ring if any offload is enabled */
4139 if ((tx_flags
& (IGB_TX_FLAGS_CSUM
| IGB_TX_FLAGS_VLAN
)) &&
4140 test_bit(IGB_RING_FLAG_TX_CTX_IDX
, &tx_ring
->flags
))
4141 olinfo_status
|= tx_ring
->reg_idx
<< 4;
4143 /* insert L4 checksum */
4144 if (tx_flags
& IGB_TX_FLAGS_CSUM
) {
4145 olinfo_status
|= E1000_TXD_POPTS_TXSM
<< 8;
4147 /* insert IPv4 checksum */
4148 if (tx_flags
& IGB_TX_FLAGS_IPV4
)
4149 olinfo_status
|= E1000_TXD_POPTS_IXSM
<< 8;
4152 tx_desc
->read
.olinfo_status
= cpu_to_le32(olinfo_status
);
4156 * The largest size we can write to the descriptor is 65535. In order to
4157 * maintain a power of two alignment we have to limit ourselves to 32K.
4159 #define IGB_MAX_TXD_PWR 15
4160 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4162 static void igb_tx_map(struct igb_ring
*tx_ring
,
4163 struct igb_tx_buffer
*first
,
4166 struct sk_buff
*skb
= first
->skb
;
4167 struct igb_tx_buffer
*tx_buffer_info
;
4168 union e1000_adv_tx_desc
*tx_desc
;
4170 struct skb_frag_struct
*frag
= &skb_shinfo(skb
)->frags
[0];
4171 unsigned int data_len
= skb
->data_len
;
4172 unsigned int size
= skb_headlen(skb
);
4173 unsigned int paylen
= skb
->len
- hdr_len
;
4175 u32 tx_flags
= first
->tx_flags
;
4176 u16 i
= tx_ring
->next_to_use
;
4178 tx_desc
= IGB_TX_DESC(tx_ring
, i
);
4180 igb_tx_olinfo_status(tx_ring
, tx_desc
, tx_flags
, paylen
);
4181 cmd_type
= igb_tx_cmd_type(tx_flags
);
4183 dma
= dma_map_single(tx_ring
->dev
, skb
->data
, size
, DMA_TO_DEVICE
);
4184 if (dma_mapping_error(tx_ring
->dev
, dma
))
4187 /* record length, and DMA address */
4188 first
->length
= size
;
4190 tx_desc
->read
.buffer_addr
= cpu_to_le64(dma
);
4193 while (unlikely(size
> IGB_MAX_DATA_PER_TXD
)) {
4194 tx_desc
->read
.cmd_type_len
=
4195 cmd_type
| cpu_to_le32(IGB_MAX_DATA_PER_TXD
);
4199 if (i
== tx_ring
->count
) {
4200 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
4204 dma
+= IGB_MAX_DATA_PER_TXD
;
4205 size
-= IGB_MAX_DATA_PER_TXD
;
4207 tx_desc
->read
.olinfo_status
= 0;
4208 tx_desc
->read
.buffer_addr
= cpu_to_le64(dma
);
4211 if (likely(!data_len
))
4214 tx_desc
->read
.cmd_type_len
= cmd_type
| cpu_to_le32(size
);
4218 if (i
== tx_ring
->count
) {
4219 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
4223 size
= skb_frag_size(frag
);
4226 dma
= skb_frag_dma_map(tx_ring
->dev
, frag
, 0,
4227 size
, DMA_TO_DEVICE
);
4228 if (dma_mapping_error(tx_ring
->dev
, dma
))
4231 tx_buffer_info
= &tx_ring
->tx_buffer_info
[i
];
4232 tx_buffer_info
->length
= size
;
4233 tx_buffer_info
->dma
= dma
;
4235 tx_desc
->read
.olinfo_status
= 0;
4236 tx_desc
->read
.buffer_addr
= cpu_to_le64(dma
);
4241 /* write last descriptor with RS and EOP bits */
4242 cmd_type
|= cpu_to_le32(size
) | cpu_to_le32(IGB_TXD_DCMD
);
4243 tx_desc
->read
.cmd_type_len
= cmd_type
;
4245 /* set the timestamp */
4246 first
->time_stamp
= jiffies
;
4249 * Force memory writes to complete before letting h/w know there
4250 * are new descriptors to fetch. (Only applicable for weak-ordered
4251 * memory model archs, such as IA-64).
4253 * We also need this memory barrier to make certain all of the
4254 * status bits have been updated before next_to_watch is written.
4258 /* set next_to_watch value indicating a packet is present */
4259 first
->next_to_watch
= tx_desc
;
4262 if (i
== tx_ring
->count
)
4265 tx_ring
->next_to_use
= i
;
4267 writel(i
, tx_ring
->tail
);
4269 /* we need this if more than one processor can write to our tail
4270 * at a time, it syncronizes IO on IA64/Altix systems */
4276 dev_err(tx_ring
->dev
, "TX DMA map failed\n");
4278 /* clear dma mappings for failed tx_buffer_info map */
4280 tx_buffer_info
= &tx_ring
->tx_buffer_info
[i
];
4281 igb_unmap_and_free_tx_resource(tx_ring
, tx_buffer_info
);
4282 if (tx_buffer_info
== first
)
4289 tx_ring
->next_to_use
= i
;
4292 static int __igb_maybe_stop_tx(struct igb_ring
*tx_ring
, const u16 size
)
4294 struct net_device
*netdev
= tx_ring
->netdev
;
4296 netif_stop_subqueue(netdev
, tx_ring
->queue_index
);
4298 /* Herbert's original patch had:
4299 * smp_mb__after_netif_stop_queue();
4300 * but since that doesn't exist yet, just open code it. */
4303 /* We need to check again in a case another CPU has just
4304 * made room available. */
4305 if (igb_desc_unused(tx_ring
) < size
)
4309 netif_wake_subqueue(netdev
, tx_ring
->queue_index
);
4311 u64_stats_update_begin(&tx_ring
->tx_syncp2
);
4312 tx_ring
->tx_stats
.restart_queue2
++;
4313 u64_stats_update_end(&tx_ring
->tx_syncp2
);
4318 static inline int igb_maybe_stop_tx(struct igb_ring
*tx_ring
, const u16 size
)
4320 if (igb_desc_unused(tx_ring
) >= size
)
4322 return __igb_maybe_stop_tx(tx_ring
, size
);
4325 netdev_tx_t
igb_xmit_frame_ring(struct sk_buff
*skb
,
4326 struct igb_ring
*tx_ring
)
4328 struct igb_tx_buffer
*first
;
4331 __be16 protocol
= vlan_get_protocol(skb
);
4334 /* need: 1 descriptor per page,
4335 * + 2 desc gap to keep tail from touching head,
4336 * + 1 desc for skb->data,
4337 * + 1 desc for context descriptor,
4338 * otherwise try next time */
4339 if (igb_maybe_stop_tx(tx_ring
, skb_shinfo(skb
)->nr_frags
+ 4)) {
4340 /* this is a hard error */
4341 return NETDEV_TX_BUSY
;
4344 /* record the location of the first descriptor for this packet */
4345 first
= &tx_ring
->tx_buffer_info
[tx_ring
->next_to_use
];
4347 first
->bytecount
= skb
->len
;
4348 first
->gso_segs
= 1;
4350 if (unlikely(skb_shinfo(skb
)->tx_flags
& SKBTX_HW_TSTAMP
)) {
4351 skb_shinfo(skb
)->tx_flags
|= SKBTX_IN_PROGRESS
;
4352 tx_flags
|= IGB_TX_FLAGS_TSTAMP
;
4355 if (vlan_tx_tag_present(skb
)) {
4356 tx_flags
|= IGB_TX_FLAGS_VLAN
;
4357 tx_flags
|= (vlan_tx_tag_get(skb
) << IGB_TX_FLAGS_VLAN_SHIFT
);
4360 /* record initial flags and protocol */
4361 first
->tx_flags
= tx_flags
;
4362 first
->protocol
= protocol
;
4364 tso
= igb_tso(tx_ring
, first
, &hdr_len
);
4368 igb_tx_csum(tx_ring
, first
);
4370 igb_tx_map(tx_ring
, first
, hdr_len
);
4372 /* Make sure there is space in the ring for the next send. */
4373 igb_maybe_stop_tx(tx_ring
, MAX_SKB_FRAGS
+ 4);
4375 return NETDEV_TX_OK
;
4378 igb_unmap_and_free_tx_resource(tx_ring
, first
);
4380 return NETDEV_TX_OK
;
4383 static inline struct igb_ring
*igb_tx_queue_mapping(struct igb_adapter
*adapter
,
4384 struct sk_buff
*skb
)
4386 unsigned int r_idx
= skb
->queue_mapping
;
4388 if (r_idx
>= adapter
->num_tx_queues
)
4389 r_idx
= r_idx
% adapter
->num_tx_queues
;
4391 return adapter
->tx_ring
[r_idx
];
4394 static netdev_tx_t
igb_xmit_frame(struct sk_buff
*skb
,
4395 struct net_device
*netdev
)
4397 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4399 if (test_bit(__IGB_DOWN
, &adapter
->state
)) {
4400 dev_kfree_skb_any(skb
);
4401 return NETDEV_TX_OK
;
4404 if (skb
->len
<= 0) {
4405 dev_kfree_skb_any(skb
);
4406 return NETDEV_TX_OK
;
4410 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4411 * in order to meet this minimum size requirement.
4413 if (skb
->len
< 17) {
4414 if (skb_padto(skb
, 17))
4415 return NETDEV_TX_OK
;
4419 return igb_xmit_frame_ring(skb
, igb_tx_queue_mapping(adapter
, skb
));
4423 * igb_tx_timeout - Respond to a Tx Hang
4424 * @netdev: network interface device structure
4426 static void igb_tx_timeout(struct net_device
*netdev
)
4428 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4429 struct e1000_hw
*hw
= &adapter
->hw
;
4431 /* Do the reset outside of interrupt context */
4432 adapter
->tx_timeout_count
++;
4434 if (hw
->mac
.type
>= e1000_82580
)
4435 hw
->dev_spec
._82575
.global_device_reset
= true;
4437 schedule_work(&adapter
->reset_task
);
4439 (adapter
->eims_enable_mask
& ~adapter
->eims_other
));
4442 static void igb_reset_task(struct work_struct
*work
)
4444 struct igb_adapter
*adapter
;
4445 adapter
= container_of(work
, struct igb_adapter
, reset_task
);
4448 netdev_err(adapter
->netdev
, "Reset adapter\n");
4449 igb_reinit_locked(adapter
);
4453 * igb_get_stats64 - Get System Network Statistics
4454 * @netdev: network interface device structure
4455 * @stats: rtnl_link_stats64 pointer
4458 static struct rtnl_link_stats64
*igb_get_stats64(struct net_device
*netdev
,
4459 struct rtnl_link_stats64
*stats
)
4461 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4463 spin_lock(&adapter
->stats64_lock
);
4464 igb_update_stats(adapter
, &adapter
->stats64
);
4465 memcpy(stats
, &adapter
->stats64
, sizeof(*stats
));
4466 spin_unlock(&adapter
->stats64_lock
);
4472 * igb_change_mtu - Change the Maximum Transfer Unit
4473 * @netdev: network interface device structure
4474 * @new_mtu: new value for maximum frame size
4476 * Returns 0 on success, negative on failure
4478 static int igb_change_mtu(struct net_device
*netdev
, int new_mtu
)
4480 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4481 struct pci_dev
*pdev
= adapter
->pdev
;
4482 int max_frame
= new_mtu
+ ETH_HLEN
+ ETH_FCS_LEN
+ VLAN_HLEN
;
4484 if ((new_mtu
< 68) || (max_frame
> MAX_JUMBO_FRAME_SIZE
)) {
4485 dev_err(&pdev
->dev
, "Invalid MTU setting\n");
4489 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4490 if (max_frame
> MAX_STD_JUMBO_FRAME_SIZE
) {
4491 dev_err(&pdev
->dev
, "MTU > 9216 not supported.\n");
4495 while (test_and_set_bit(__IGB_RESETTING
, &adapter
->state
))
4498 /* igb_down has a dependency on max_frame_size */
4499 adapter
->max_frame_size
= max_frame
;
4501 if (netif_running(netdev
))
4504 dev_info(&pdev
->dev
, "changing MTU from %d to %d\n",
4505 netdev
->mtu
, new_mtu
);
4506 netdev
->mtu
= new_mtu
;
4508 if (netif_running(netdev
))
4513 clear_bit(__IGB_RESETTING
, &adapter
->state
);
4519 * igb_update_stats - Update the board statistics counters
4520 * @adapter: board private structure
4523 void igb_update_stats(struct igb_adapter
*adapter
,
4524 struct rtnl_link_stats64
*net_stats
)
4526 struct e1000_hw
*hw
= &adapter
->hw
;
4527 struct pci_dev
*pdev
= adapter
->pdev
;
4533 u64 _bytes
, _packets
;
4535 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4538 * Prevent stats update while adapter is being reset, or if the pci
4539 * connection is down.
4541 if (adapter
->link_speed
== 0)
4543 if (pci_channel_offline(pdev
))
4548 for (i
= 0; i
< adapter
->num_rx_queues
; i
++) {
4549 u32 rqdpc_tmp
= rd32(E1000_RQDPC(i
)) & 0x0FFF;
4550 struct igb_ring
*ring
= adapter
->rx_ring
[i
];
4552 ring
->rx_stats
.drops
+= rqdpc_tmp
;
4553 net_stats
->rx_fifo_errors
+= rqdpc_tmp
;
4556 start
= u64_stats_fetch_begin_bh(&ring
->rx_syncp
);
4557 _bytes
= ring
->rx_stats
.bytes
;
4558 _packets
= ring
->rx_stats
.packets
;
4559 } while (u64_stats_fetch_retry_bh(&ring
->rx_syncp
, start
));
4561 packets
+= _packets
;
4564 net_stats
->rx_bytes
= bytes
;
4565 net_stats
->rx_packets
= packets
;
4569 for (i
= 0; i
< adapter
->num_tx_queues
; i
++) {
4570 struct igb_ring
*ring
= adapter
->tx_ring
[i
];
4572 start
= u64_stats_fetch_begin_bh(&ring
->tx_syncp
);
4573 _bytes
= ring
->tx_stats
.bytes
;
4574 _packets
= ring
->tx_stats
.packets
;
4575 } while (u64_stats_fetch_retry_bh(&ring
->tx_syncp
, start
));
4577 packets
+= _packets
;
4579 net_stats
->tx_bytes
= bytes
;
4580 net_stats
->tx_packets
= packets
;
4582 /* read stats registers */
4583 adapter
->stats
.crcerrs
+= rd32(E1000_CRCERRS
);
4584 adapter
->stats
.gprc
+= rd32(E1000_GPRC
);
4585 adapter
->stats
.gorc
+= rd32(E1000_GORCL
);
4586 rd32(E1000_GORCH
); /* clear GORCL */
4587 adapter
->stats
.bprc
+= rd32(E1000_BPRC
);
4588 adapter
->stats
.mprc
+= rd32(E1000_MPRC
);
4589 adapter
->stats
.roc
+= rd32(E1000_ROC
);
4591 adapter
->stats
.prc64
+= rd32(E1000_PRC64
);
4592 adapter
->stats
.prc127
+= rd32(E1000_PRC127
);
4593 adapter
->stats
.prc255
+= rd32(E1000_PRC255
);
4594 adapter
->stats
.prc511
+= rd32(E1000_PRC511
);
4595 adapter
->stats
.prc1023
+= rd32(E1000_PRC1023
);
4596 adapter
->stats
.prc1522
+= rd32(E1000_PRC1522
);
4597 adapter
->stats
.symerrs
+= rd32(E1000_SYMERRS
);
4598 adapter
->stats
.sec
+= rd32(E1000_SEC
);
4600 mpc
= rd32(E1000_MPC
);
4601 adapter
->stats
.mpc
+= mpc
;
4602 net_stats
->rx_fifo_errors
+= mpc
;
4603 adapter
->stats
.scc
+= rd32(E1000_SCC
);
4604 adapter
->stats
.ecol
+= rd32(E1000_ECOL
);
4605 adapter
->stats
.mcc
+= rd32(E1000_MCC
);
4606 adapter
->stats
.latecol
+= rd32(E1000_LATECOL
);
4607 adapter
->stats
.dc
+= rd32(E1000_DC
);
4608 adapter
->stats
.rlec
+= rd32(E1000_RLEC
);
4609 adapter
->stats
.xonrxc
+= rd32(E1000_XONRXC
);
4610 adapter
->stats
.xontxc
+= rd32(E1000_XONTXC
);
4611 adapter
->stats
.xoffrxc
+= rd32(E1000_XOFFRXC
);
4612 adapter
->stats
.xofftxc
+= rd32(E1000_XOFFTXC
);
4613 adapter
->stats
.fcruc
+= rd32(E1000_FCRUC
);
4614 adapter
->stats
.gptc
+= rd32(E1000_GPTC
);
4615 adapter
->stats
.gotc
+= rd32(E1000_GOTCL
);
4616 rd32(E1000_GOTCH
); /* clear GOTCL */
4617 adapter
->stats
.rnbc
+= rd32(E1000_RNBC
);
4618 adapter
->stats
.ruc
+= rd32(E1000_RUC
);
4619 adapter
->stats
.rfc
+= rd32(E1000_RFC
);
4620 adapter
->stats
.rjc
+= rd32(E1000_RJC
);
4621 adapter
->stats
.tor
+= rd32(E1000_TORH
);
4622 adapter
->stats
.tot
+= rd32(E1000_TOTH
);
4623 adapter
->stats
.tpr
+= rd32(E1000_TPR
);
4625 adapter
->stats
.ptc64
+= rd32(E1000_PTC64
);
4626 adapter
->stats
.ptc127
+= rd32(E1000_PTC127
);
4627 adapter
->stats
.ptc255
+= rd32(E1000_PTC255
);
4628 adapter
->stats
.ptc511
+= rd32(E1000_PTC511
);
4629 adapter
->stats
.ptc1023
+= rd32(E1000_PTC1023
);
4630 adapter
->stats
.ptc1522
+= rd32(E1000_PTC1522
);
4632 adapter
->stats
.mptc
+= rd32(E1000_MPTC
);
4633 adapter
->stats
.bptc
+= rd32(E1000_BPTC
);
4635 adapter
->stats
.tpt
+= rd32(E1000_TPT
);
4636 adapter
->stats
.colc
+= rd32(E1000_COLC
);
4638 adapter
->stats
.algnerrc
+= rd32(E1000_ALGNERRC
);
4639 /* read internal phy specific stats */
4640 reg
= rd32(E1000_CTRL_EXT
);
4641 if (!(reg
& E1000_CTRL_EXT_LINK_MODE_MASK
)) {
4642 adapter
->stats
.rxerrc
+= rd32(E1000_RXERRC
);
4643 adapter
->stats
.tncrs
+= rd32(E1000_TNCRS
);
4646 adapter
->stats
.tsctc
+= rd32(E1000_TSCTC
);
4647 adapter
->stats
.tsctfc
+= rd32(E1000_TSCTFC
);
4649 adapter
->stats
.iac
+= rd32(E1000_IAC
);
4650 adapter
->stats
.icrxoc
+= rd32(E1000_ICRXOC
);
4651 adapter
->stats
.icrxptc
+= rd32(E1000_ICRXPTC
);
4652 adapter
->stats
.icrxatc
+= rd32(E1000_ICRXATC
);
4653 adapter
->stats
.ictxptc
+= rd32(E1000_ICTXPTC
);
4654 adapter
->stats
.ictxatc
+= rd32(E1000_ICTXATC
);
4655 adapter
->stats
.ictxqec
+= rd32(E1000_ICTXQEC
);
4656 adapter
->stats
.ictxqmtc
+= rd32(E1000_ICTXQMTC
);
4657 adapter
->stats
.icrxdmtc
+= rd32(E1000_ICRXDMTC
);
4659 /* Fill out the OS statistics structure */
4660 net_stats
->multicast
= adapter
->stats
.mprc
;
4661 net_stats
->collisions
= adapter
->stats
.colc
;
4665 /* RLEC on some newer hardware can be incorrect so build
4666 * our own version based on RUC and ROC */
4667 net_stats
->rx_errors
= adapter
->stats
.rxerrc
+
4668 adapter
->stats
.crcerrs
+ adapter
->stats
.algnerrc
+
4669 adapter
->stats
.ruc
+ adapter
->stats
.roc
+
4670 adapter
->stats
.cexterr
;
4671 net_stats
->rx_length_errors
= adapter
->stats
.ruc
+
4673 net_stats
->rx_crc_errors
= adapter
->stats
.crcerrs
;
4674 net_stats
->rx_frame_errors
= adapter
->stats
.algnerrc
;
4675 net_stats
->rx_missed_errors
= adapter
->stats
.mpc
;
4678 net_stats
->tx_errors
= adapter
->stats
.ecol
+
4679 adapter
->stats
.latecol
;
4680 net_stats
->tx_aborted_errors
= adapter
->stats
.ecol
;
4681 net_stats
->tx_window_errors
= adapter
->stats
.latecol
;
4682 net_stats
->tx_carrier_errors
= adapter
->stats
.tncrs
;
4684 /* Tx Dropped needs to be maintained elsewhere */
4687 if (hw
->phy
.media_type
== e1000_media_type_copper
) {
4688 if ((adapter
->link_speed
== SPEED_1000
) &&
4689 (!igb_read_phy_reg(hw
, PHY_1000T_STATUS
, &phy_tmp
))) {
4690 phy_tmp
&= PHY_IDLE_ERROR_COUNT_MASK
;
4691 adapter
->phy_stats
.idle_errors
+= phy_tmp
;
4695 /* Management Stats */
4696 adapter
->stats
.mgptc
+= rd32(E1000_MGTPTC
);
4697 adapter
->stats
.mgprc
+= rd32(E1000_MGTPRC
);
4698 adapter
->stats
.mgpdc
+= rd32(E1000_MGTPDC
);
4701 reg
= rd32(E1000_MANC
);
4702 if (reg
& E1000_MANC_EN_BMC2OS
) {
4703 adapter
->stats
.o2bgptc
+= rd32(E1000_O2BGPTC
);
4704 adapter
->stats
.o2bspc
+= rd32(E1000_O2BSPC
);
4705 adapter
->stats
.b2ospc
+= rd32(E1000_B2OSPC
);
4706 adapter
->stats
.b2ogprc
+= rd32(E1000_B2OGPRC
);
4710 static irqreturn_t
igb_msix_other(int irq
, void *data
)
4712 struct igb_adapter
*adapter
= data
;
4713 struct e1000_hw
*hw
= &adapter
->hw
;
4714 u32 icr
= rd32(E1000_ICR
);
4715 /* reading ICR causes bit 31 of EICR to be cleared */
4717 if (icr
& E1000_ICR_DRSTA
)
4718 schedule_work(&adapter
->reset_task
);
4720 if (icr
& E1000_ICR_DOUTSYNC
) {
4721 /* HW is reporting DMA is out of sync */
4722 adapter
->stats
.doosync
++;
4723 /* The DMA Out of Sync is also indication of a spoof event
4724 * in IOV mode. Check the Wrong VM Behavior register to
4725 * see if it is really a spoof event. */
4726 igb_check_wvbr(adapter
);
4729 /* Check for a mailbox event */
4730 if (icr
& E1000_ICR_VMMB
)
4731 igb_msg_task(adapter
);
4733 if (icr
& E1000_ICR_LSC
) {
4734 hw
->mac
.get_link_status
= 1;
4735 /* guard against interrupt when we're going down */
4736 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
4737 mod_timer(&adapter
->watchdog_timer
, jiffies
+ 1);
4740 wr32(E1000_EIMS
, adapter
->eims_other
);
4745 static void igb_write_itr(struct igb_q_vector
*q_vector
)
4747 struct igb_adapter
*adapter
= q_vector
->adapter
;
4748 u32 itr_val
= q_vector
->itr_val
& 0x7FFC;
4750 if (!q_vector
->set_itr
)
4756 if (adapter
->hw
.mac
.type
== e1000_82575
)
4757 itr_val
|= itr_val
<< 16;
4759 itr_val
|= E1000_EITR_CNT_IGNR
;
4761 writel(itr_val
, q_vector
->itr_register
);
4762 q_vector
->set_itr
= 0;
4765 static irqreturn_t
igb_msix_ring(int irq
, void *data
)
4767 struct igb_q_vector
*q_vector
= data
;
4769 /* Write the ITR value calculated from the previous interrupt. */
4770 igb_write_itr(q_vector
);
4772 napi_schedule(&q_vector
->napi
);
4777 #ifdef CONFIG_IGB_DCA
4778 static void igb_update_dca(struct igb_q_vector
*q_vector
)
4780 struct igb_adapter
*adapter
= q_vector
->adapter
;
4781 struct e1000_hw
*hw
= &adapter
->hw
;
4782 int cpu
= get_cpu();
4784 if (q_vector
->cpu
== cpu
)
4787 if (q_vector
->tx
.ring
) {
4788 int q
= q_vector
->tx
.ring
->reg_idx
;
4789 u32 dca_txctrl
= rd32(E1000_DCA_TXCTRL(q
));
4790 if (hw
->mac
.type
== e1000_82575
) {
4791 dca_txctrl
&= ~E1000_DCA_TXCTRL_CPUID_MASK
;
4792 dca_txctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
);
4794 dca_txctrl
&= ~E1000_DCA_TXCTRL_CPUID_MASK_82576
;
4795 dca_txctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
) <<
4796 E1000_DCA_TXCTRL_CPUID_SHIFT
;
4798 dca_txctrl
|= E1000_DCA_TXCTRL_DESC_DCA_EN
;
4799 wr32(E1000_DCA_TXCTRL(q
), dca_txctrl
);
4801 if (q_vector
->rx
.ring
) {
4802 int q
= q_vector
->rx
.ring
->reg_idx
;
4803 u32 dca_rxctrl
= rd32(E1000_DCA_RXCTRL(q
));
4804 if (hw
->mac
.type
== e1000_82575
) {
4805 dca_rxctrl
&= ~E1000_DCA_RXCTRL_CPUID_MASK
;
4806 dca_rxctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
);
4808 dca_rxctrl
&= ~E1000_DCA_RXCTRL_CPUID_MASK_82576
;
4809 dca_rxctrl
|= dca3_get_tag(&adapter
->pdev
->dev
, cpu
) <<
4810 E1000_DCA_RXCTRL_CPUID_SHIFT
;
4812 dca_rxctrl
|= E1000_DCA_RXCTRL_DESC_DCA_EN
;
4813 dca_rxctrl
|= E1000_DCA_RXCTRL_HEAD_DCA_EN
;
4814 dca_rxctrl
|= E1000_DCA_RXCTRL_DATA_DCA_EN
;
4815 wr32(E1000_DCA_RXCTRL(q
), dca_rxctrl
);
4817 q_vector
->cpu
= cpu
;
4822 static void igb_setup_dca(struct igb_adapter
*adapter
)
4824 struct e1000_hw
*hw
= &adapter
->hw
;
4827 if (!(adapter
->flags
& IGB_FLAG_DCA_ENABLED
))
4830 /* Always use CB2 mode, difference is masked in the CB driver. */
4831 wr32(E1000_DCA_CTRL
, E1000_DCA_CTRL_DCA_MODE_CB2
);
4833 for (i
= 0; i
< adapter
->num_q_vectors
; i
++) {
4834 adapter
->q_vector
[i
]->cpu
= -1;
4835 igb_update_dca(adapter
->q_vector
[i
]);
4839 static int __igb_notify_dca(struct device
*dev
, void *data
)
4841 struct net_device
*netdev
= dev_get_drvdata(dev
);
4842 struct igb_adapter
*adapter
= netdev_priv(netdev
);
4843 struct pci_dev
*pdev
= adapter
->pdev
;
4844 struct e1000_hw
*hw
= &adapter
->hw
;
4845 unsigned long event
= *(unsigned long *)data
;
4848 case DCA_PROVIDER_ADD
:
4849 /* if already enabled, don't do it again */
4850 if (adapter
->flags
& IGB_FLAG_DCA_ENABLED
)
4852 if (dca_add_requester(dev
) == 0) {
4853 adapter
->flags
|= IGB_FLAG_DCA_ENABLED
;
4854 dev_info(&pdev
->dev
, "DCA enabled\n");
4855 igb_setup_dca(adapter
);
4858 /* Fall Through since DCA is disabled. */
4859 case DCA_PROVIDER_REMOVE
:
4860 if (adapter
->flags
& IGB_FLAG_DCA_ENABLED
) {
4861 /* without this a class_device is left
4862 * hanging around in the sysfs model */
4863 dca_remove_requester(dev
);
4864 dev_info(&pdev
->dev
, "DCA disabled\n");
4865 adapter
->flags
&= ~IGB_FLAG_DCA_ENABLED
;
4866 wr32(E1000_DCA_CTRL
, E1000_DCA_CTRL_DCA_MODE_DISABLE
);
4874 static int igb_notify_dca(struct notifier_block
*nb
, unsigned long event
,
4879 ret_val
= driver_for_each_device(&igb_driver
.driver
, NULL
, &event
,
4882 return ret_val
? NOTIFY_BAD
: NOTIFY_DONE
;
4884 #endif /* CONFIG_IGB_DCA */
4886 #ifdef CONFIG_PCI_IOV
4887 static int igb_vf_configure(struct igb_adapter
*adapter
, int vf
)
4889 unsigned char mac_addr
[ETH_ALEN
];
4890 struct pci_dev
*pdev
= adapter
->pdev
;
4891 struct e1000_hw
*hw
= &adapter
->hw
;
4892 struct pci_dev
*pvfdev
;
4893 unsigned int device_id
;
4896 random_ether_addr(mac_addr
);
4897 igb_set_vf_mac(adapter
, vf
, mac_addr
);
4899 switch (adapter
->hw
.mac
.type
) {
4901 device_id
= IGB_82576_VF_DEV_ID
;
4902 /* VF Stride for 82576 is 2 */
4903 thisvf_devfn
= (pdev
->devfn
+ 0x80 + (vf
<< 1)) |
4907 device_id
= IGB_I350_VF_DEV_ID
;
4908 /* VF Stride for I350 is 4 */
4909 thisvf_devfn
= (pdev
->devfn
+ 0x80 + (vf
<< 2)) |
4918 pvfdev
= pci_get_device(hw
->vendor_id
, device_id
, NULL
);
4920 if (pvfdev
->devfn
== thisvf_devfn
)
4922 pvfdev
= pci_get_device(hw
->vendor_id
,
4927 adapter
->vf_data
[vf
].vfdev
= pvfdev
;
4930 "Couldn't find pci dev ptr for VF %4.4x\n",
4932 return pvfdev
!= NULL
;
4935 static int igb_find_enabled_vfs(struct igb_adapter
*adapter
)
4937 struct e1000_hw
*hw
= &adapter
->hw
;
4938 struct pci_dev
*pdev
= adapter
->pdev
;
4939 struct pci_dev
*pvfdev
;
4942 unsigned int device_id
;
4945 switch (adapter
->hw
.mac
.type
) {
4947 device_id
= IGB_82576_VF_DEV_ID
;
4948 /* VF Stride for 82576 is 2 */
4952 device_id
= IGB_I350_VF_DEV_ID
;
4953 /* VF Stride for I350 is 4 */
4962 vf_devfn
= pdev
->devfn
+ 0x80;
4963 pvfdev
= pci_get_device(hw
->vendor_id
, device_id
, NULL
);
4965 if (pvfdev
->devfn
== vf_devfn
)
4967 vf_devfn
+= vf_stride
;
4968 pvfdev
= pci_get_device(hw
->vendor_id
,
4975 static int igb_check_vf_assignment(struct igb_adapter
*adapter
)
4978 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++) {
4979 if (adapter
->vf_data
[i
].vfdev
) {
4980 if (adapter
->vf_data
[i
].vfdev
->dev_flags
&
4981 PCI_DEV_FLAGS_ASSIGNED
)
4989 static void igb_ping_all_vfs(struct igb_adapter
*adapter
)
4991 struct e1000_hw
*hw
= &adapter
->hw
;
4995 for (i
= 0 ; i
< adapter
->vfs_allocated_count
; i
++) {
4996 ping
= E1000_PF_CONTROL_MSG
;
4997 if (adapter
->vf_data
[i
].flags
& IGB_VF_FLAG_CTS
)
4998 ping
|= E1000_VT_MSGTYPE_CTS
;
4999 igb_write_mbx(hw
, &ping
, 1, i
);
5003 static int igb_set_vf_promisc(struct igb_adapter
*adapter
, u32
*msgbuf
, u32 vf
)
5005 struct e1000_hw
*hw
= &adapter
->hw
;
5006 u32 vmolr
= rd32(E1000_VMOLR(vf
));
5007 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5009 vf_data
->flags
&= ~(IGB_VF_FLAG_UNI_PROMISC
|
5010 IGB_VF_FLAG_MULTI_PROMISC
);
5011 vmolr
&= ~(E1000_VMOLR_ROPE
| E1000_VMOLR_ROMPE
| E1000_VMOLR_MPME
);
5013 if (*msgbuf
& E1000_VF_SET_PROMISC_MULTICAST
) {
5014 vmolr
|= E1000_VMOLR_MPME
;
5015 vf_data
->flags
|= IGB_VF_FLAG_MULTI_PROMISC
;
5016 *msgbuf
&= ~E1000_VF_SET_PROMISC_MULTICAST
;
5019 * if we have hashes and we are clearing a multicast promisc
5020 * flag we need to write the hashes to the MTA as this step
5021 * was previously skipped
5023 if (vf_data
->num_vf_mc_hashes
> 30) {
5024 vmolr
|= E1000_VMOLR_MPME
;
5025 } else if (vf_data
->num_vf_mc_hashes
) {
5027 vmolr
|= E1000_VMOLR_ROMPE
;
5028 for (j
= 0; j
< vf_data
->num_vf_mc_hashes
; j
++)
5029 igb_mta_set(hw
, vf_data
->vf_mc_hashes
[j
]);
5033 wr32(E1000_VMOLR(vf
), vmolr
);
5035 /* there are flags left unprocessed, likely not supported */
5036 if (*msgbuf
& E1000_VT_MSGINFO_MASK
)
5043 static int igb_set_vf_multicasts(struct igb_adapter
*adapter
,
5044 u32
*msgbuf
, u32 vf
)
5046 int n
= (msgbuf
[0] & E1000_VT_MSGINFO_MASK
) >> E1000_VT_MSGINFO_SHIFT
;
5047 u16
*hash_list
= (u16
*)&msgbuf
[1];
5048 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5051 /* salt away the number of multicast addresses assigned
5052 * to this VF for later use to restore when the PF multi cast
5055 vf_data
->num_vf_mc_hashes
= n
;
5057 /* only up to 30 hash values supported */
5061 /* store the hashes for later use */
5062 for (i
= 0; i
< n
; i
++)
5063 vf_data
->vf_mc_hashes
[i
] = hash_list
[i
];
5065 /* Flush and reset the mta with the new values */
5066 igb_set_rx_mode(adapter
->netdev
);
5071 static void igb_restore_vf_multicasts(struct igb_adapter
*adapter
)
5073 struct e1000_hw
*hw
= &adapter
->hw
;
5074 struct vf_data_storage
*vf_data
;
5077 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++) {
5078 u32 vmolr
= rd32(E1000_VMOLR(i
));
5079 vmolr
&= ~(E1000_VMOLR_ROMPE
| E1000_VMOLR_MPME
);
5081 vf_data
= &adapter
->vf_data
[i
];
5083 if ((vf_data
->num_vf_mc_hashes
> 30) ||
5084 (vf_data
->flags
& IGB_VF_FLAG_MULTI_PROMISC
)) {
5085 vmolr
|= E1000_VMOLR_MPME
;
5086 } else if (vf_data
->num_vf_mc_hashes
) {
5087 vmolr
|= E1000_VMOLR_ROMPE
;
5088 for (j
= 0; j
< vf_data
->num_vf_mc_hashes
; j
++)
5089 igb_mta_set(hw
, vf_data
->vf_mc_hashes
[j
]);
5091 wr32(E1000_VMOLR(i
), vmolr
);
5095 static void igb_clear_vf_vfta(struct igb_adapter
*adapter
, u32 vf
)
5097 struct e1000_hw
*hw
= &adapter
->hw
;
5098 u32 pool_mask
, reg
, vid
;
5101 pool_mask
= 1 << (E1000_VLVF_POOLSEL_SHIFT
+ vf
);
5103 /* Find the vlan filter for this id */
5104 for (i
= 0; i
< E1000_VLVF_ARRAY_SIZE
; i
++) {
5105 reg
= rd32(E1000_VLVF(i
));
5107 /* remove the vf from the pool */
5110 /* if pool is empty then remove entry from vfta */
5111 if (!(reg
& E1000_VLVF_POOLSEL_MASK
) &&
5112 (reg
& E1000_VLVF_VLANID_ENABLE
)) {
5114 vid
= reg
& E1000_VLVF_VLANID_MASK
;
5115 igb_vfta_set(hw
, vid
, false);
5118 wr32(E1000_VLVF(i
), reg
);
5121 adapter
->vf_data
[vf
].vlans_enabled
= 0;
5124 static s32
igb_vlvf_set(struct igb_adapter
*adapter
, u32 vid
, bool add
, u32 vf
)
5126 struct e1000_hw
*hw
= &adapter
->hw
;
5129 /* The vlvf table only exists on 82576 hardware and newer */
5130 if (hw
->mac
.type
< e1000_82576
)
5133 /* we only need to do this if VMDq is enabled */
5134 if (!adapter
->vfs_allocated_count
)
5137 /* Find the vlan filter for this id */
5138 for (i
= 0; i
< E1000_VLVF_ARRAY_SIZE
; i
++) {
5139 reg
= rd32(E1000_VLVF(i
));
5140 if ((reg
& E1000_VLVF_VLANID_ENABLE
) &&
5141 vid
== (reg
& E1000_VLVF_VLANID_MASK
))
5146 if (i
== E1000_VLVF_ARRAY_SIZE
) {
5147 /* Did not find a matching VLAN ID entry that was
5148 * enabled. Search for a free filter entry, i.e.
5149 * one without the enable bit set
5151 for (i
= 0; i
< E1000_VLVF_ARRAY_SIZE
; i
++) {
5152 reg
= rd32(E1000_VLVF(i
));
5153 if (!(reg
& E1000_VLVF_VLANID_ENABLE
))
5157 if (i
< E1000_VLVF_ARRAY_SIZE
) {
5158 /* Found an enabled/available entry */
5159 reg
|= 1 << (E1000_VLVF_POOLSEL_SHIFT
+ vf
);
5161 /* if !enabled we need to set this up in vfta */
5162 if (!(reg
& E1000_VLVF_VLANID_ENABLE
)) {
5163 /* add VID to filter table */
5164 igb_vfta_set(hw
, vid
, true);
5165 reg
|= E1000_VLVF_VLANID_ENABLE
;
5167 reg
&= ~E1000_VLVF_VLANID_MASK
;
5169 wr32(E1000_VLVF(i
), reg
);
5171 /* do not modify RLPML for PF devices */
5172 if (vf
>= adapter
->vfs_allocated_count
)
5175 if (!adapter
->vf_data
[vf
].vlans_enabled
) {
5177 reg
= rd32(E1000_VMOLR(vf
));
5178 size
= reg
& E1000_VMOLR_RLPML_MASK
;
5180 reg
&= ~E1000_VMOLR_RLPML_MASK
;
5182 wr32(E1000_VMOLR(vf
), reg
);
5185 adapter
->vf_data
[vf
].vlans_enabled
++;
5188 if (i
< E1000_VLVF_ARRAY_SIZE
) {
5189 /* remove vf from the pool */
5190 reg
&= ~(1 << (E1000_VLVF_POOLSEL_SHIFT
+ vf
));
5191 /* if pool is empty then remove entry from vfta */
5192 if (!(reg
& E1000_VLVF_POOLSEL_MASK
)) {
5194 igb_vfta_set(hw
, vid
, false);
5196 wr32(E1000_VLVF(i
), reg
);
5198 /* do not modify RLPML for PF devices */
5199 if (vf
>= adapter
->vfs_allocated_count
)
5202 adapter
->vf_data
[vf
].vlans_enabled
--;
5203 if (!adapter
->vf_data
[vf
].vlans_enabled
) {
5205 reg
= rd32(E1000_VMOLR(vf
));
5206 size
= reg
& E1000_VMOLR_RLPML_MASK
;
5208 reg
&= ~E1000_VMOLR_RLPML_MASK
;
5210 wr32(E1000_VMOLR(vf
), reg
);
5217 static void igb_set_vmvir(struct igb_adapter
*adapter
, u32 vid
, u32 vf
)
5219 struct e1000_hw
*hw
= &adapter
->hw
;
5222 wr32(E1000_VMVIR(vf
), (vid
| E1000_VMVIR_VLANA_DEFAULT
));
5224 wr32(E1000_VMVIR(vf
), 0);
5227 static int igb_ndo_set_vf_vlan(struct net_device
*netdev
,
5228 int vf
, u16 vlan
, u8 qos
)
5231 struct igb_adapter
*adapter
= netdev_priv(netdev
);
5233 if ((vf
>= adapter
->vfs_allocated_count
) || (vlan
> 4095) || (qos
> 7))
5236 err
= igb_vlvf_set(adapter
, vlan
, !!vlan
, vf
);
5239 igb_set_vmvir(adapter
, vlan
| (qos
<< VLAN_PRIO_SHIFT
), vf
);
5240 igb_set_vmolr(adapter
, vf
, !vlan
);
5241 adapter
->vf_data
[vf
].pf_vlan
= vlan
;
5242 adapter
->vf_data
[vf
].pf_qos
= qos
;
5243 dev_info(&adapter
->pdev
->dev
,
5244 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan
, qos
, vf
);
5245 if (test_bit(__IGB_DOWN
, &adapter
->state
)) {
5246 dev_warn(&adapter
->pdev
->dev
,
5247 "The VF VLAN has been set,"
5248 " but the PF device is not up.\n");
5249 dev_warn(&adapter
->pdev
->dev
,
5250 "Bring the PF device up before"
5251 " attempting to use the VF device.\n");
5254 igb_vlvf_set(adapter
, adapter
->vf_data
[vf
].pf_vlan
,
5256 igb_set_vmvir(adapter
, vlan
, vf
);
5257 igb_set_vmolr(adapter
, vf
, true);
5258 adapter
->vf_data
[vf
].pf_vlan
= 0;
5259 adapter
->vf_data
[vf
].pf_qos
= 0;
5265 static int igb_set_vf_vlan(struct igb_adapter
*adapter
, u32
*msgbuf
, u32 vf
)
5267 int add
= (msgbuf
[0] & E1000_VT_MSGINFO_MASK
) >> E1000_VT_MSGINFO_SHIFT
;
5268 int vid
= (msgbuf
[1] & E1000_VLVF_VLANID_MASK
);
5270 return igb_vlvf_set(adapter
, vid
, add
, vf
);
5273 static inline void igb_vf_reset(struct igb_adapter
*adapter
, u32 vf
)
5275 /* clear flags - except flag that indicates PF has set the MAC */
5276 adapter
->vf_data
[vf
].flags
&= IGB_VF_FLAG_PF_SET_MAC
;
5277 adapter
->vf_data
[vf
].last_nack
= jiffies
;
5279 /* reset offloads to defaults */
5280 igb_set_vmolr(adapter
, vf
, true);
5282 /* reset vlans for device */
5283 igb_clear_vf_vfta(adapter
, vf
);
5284 if (adapter
->vf_data
[vf
].pf_vlan
)
5285 igb_ndo_set_vf_vlan(adapter
->netdev
, vf
,
5286 adapter
->vf_data
[vf
].pf_vlan
,
5287 adapter
->vf_data
[vf
].pf_qos
);
5289 igb_clear_vf_vfta(adapter
, vf
);
5291 /* reset multicast table array for vf */
5292 adapter
->vf_data
[vf
].num_vf_mc_hashes
= 0;
5294 /* Flush and reset the mta with the new values */
5295 igb_set_rx_mode(adapter
->netdev
);
5298 static void igb_vf_reset_event(struct igb_adapter
*adapter
, u32 vf
)
5300 unsigned char *vf_mac
= adapter
->vf_data
[vf
].vf_mac_addresses
;
5302 /* generate a new mac address as we were hotplug removed/added */
5303 if (!(adapter
->vf_data
[vf
].flags
& IGB_VF_FLAG_PF_SET_MAC
))
5304 random_ether_addr(vf_mac
);
5306 /* process remaining reset events */
5307 igb_vf_reset(adapter
, vf
);
5310 static void igb_vf_reset_msg(struct igb_adapter
*adapter
, u32 vf
)
5312 struct e1000_hw
*hw
= &adapter
->hw
;
5313 unsigned char *vf_mac
= adapter
->vf_data
[vf
].vf_mac_addresses
;
5314 int rar_entry
= hw
->mac
.rar_entry_count
- (vf
+ 1);
5316 u8
*addr
= (u8
*)(&msgbuf
[1]);
5318 /* process all the same items cleared in a function level reset */
5319 igb_vf_reset(adapter
, vf
);
5321 /* set vf mac address */
5322 igb_rar_set_qsel(adapter
, vf_mac
, rar_entry
, vf
);
5324 /* enable transmit and receive for vf */
5325 reg
= rd32(E1000_VFTE
);
5326 wr32(E1000_VFTE
, reg
| (1 << vf
));
5327 reg
= rd32(E1000_VFRE
);
5328 wr32(E1000_VFRE
, reg
| (1 << vf
));
5330 adapter
->vf_data
[vf
].flags
|= IGB_VF_FLAG_CTS
;
5332 /* reply to reset with ack and vf mac address */
5333 msgbuf
[0] = E1000_VF_RESET
| E1000_VT_MSGTYPE_ACK
;
5334 memcpy(addr
, vf_mac
, 6);
5335 igb_write_mbx(hw
, msgbuf
, 3, vf
);
5338 static int igb_set_vf_mac_addr(struct igb_adapter
*adapter
, u32
*msg
, int vf
)
5341 * The VF MAC Address is stored in a packed array of bytes
5342 * starting at the second 32 bit word of the msg array
5344 unsigned char *addr
= (char *)&msg
[1];
5347 if (is_valid_ether_addr(addr
))
5348 err
= igb_set_vf_mac(adapter
, vf
, addr
);
5353 static void igb_rcv_ack_from_vf(struct igb_adapter
*adapter
, u32 vf
)
5355 struct e1000_hw
*hw
= &adapter
->hw
;
5356 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5357 u32 msg
= E1000_VT_MSGTYPE_NACK
;
5359 /* if device isn't clear to send it shouldn't be reading either */
5360 if (!(vf_data
->flags
& IGB_VF_FLAG_CTS
) &&
5361 time_after(jiffies
, vf_data
->last_nack
+ (2 * HZ
))) {
5362 igb_write_mbx(hw
, &msg
, 1, vf
);
5363 vf_data
->last_nack
= jiffies
;
5367 static void igb_rcv_msg_from_vf(struct igb_adapter
*adapter
, u32 vf
)
5369 struct pci_dev
*pdev
= adapter
->pdev
;
5370 u32 msgbuf
[E1000_VFMAILBOX_SIZE
];
5371 struct e1000_hw
*hw
= &adapter
->hw
;
5372 struct vf_data_storage
*vf_data
= &adapter
->vf_data
[vf
];
5375 retval
= igb_read_mbx(hw
, msgbuf
, E1000_VFMAILBOX_SIZE
, vf
);
5378 /* if receive failed revoke VF CTS stats and restart init */
5379 dev_err(&pdev
->dev
, "Error receiving message from VF\n");
5380 vf_data
->flags
&= ~IGB_VF_FLAG_CTS
;
5381 if (!time_after(jiffies
, vf_data
->last_nack
+ (2 * HZ
)))
5386 /* this is a message we already processed, do nothing */
5387 if (msgbuf
[0] & (E1000_VT_MSGTYPE_ACK
| E1000_VT_MSGTYPE_NACK
))
5391 * until the vf completes a reset it should not be
5392 * allowed to start any configuration.
5395 if (msgbuf
[0] == E1000_VF_RESET
) {
5396 igb_vf_reset_msg(adapter
, vf
);
5400 if (!(vf_data
->flags
& IGB_VF_FLAG_CTS
)) {
5401 if (!time_after(jiffies
, vf_data
->last_nack
+ (2 * HZ
)))
5407 switch ((msgbuf
[0] & 0xFFFF)) {
5408 case E1000_VF_SET_MAC_ADDR
:
5410 if (!(vf_data
->flags
& IGB_VF_FLAG_PF_SET_MAC
))
5411 retval
= igb_set_vf_mac_addr(adapter
, msgbuf
, vf
);
5413 dev_warn(&pdev
->dev
,
5414 "VF %d attempted to override administratively "
5415 "set MAC address\nReload the VF driver to "
5416 "resume operations\n", vf
);
5418 case E1000_VF_SET_PROMISC
:
5419 retval
= igb_set_vf_promisc(adapter
, msgbuf
, vf
);
5421 case E1000_VF_SET_MULTICAST
:
5422 retval
= igb_set_vf_multicasts(adapter
, msgbuf
, vf
);
5424 case E1000_VF_SET_LPE
:
5425 retval
= igb_set_vf_rlpml(adapter
, msgbuf
[1], vf
);
5427 case E1000_VF_SET_VLAN
:
5429 if (vf_data
->pf_vlan
)
5430 dev_warn(&pdev
->dev
,
5431 "VF %d attempted to override administratively "
5432 "set VLAN tag\nReload the VF driver to "
5433 "resume operations\n", vf
);
5435 retval
= igb_set_vf_vlan(adapter
, msgbuf
, vf
);
5438 dev_err(&pdev
->dev
, "Unhandled Msg %08x\n", msgbuf
[0]);
5443 msgbuf
[0] |= E1000_VT_MSGTYPE_CTS
;
5445 /* notify the VF of the results of what it sent us */
5447 msgbuf
[0] |= E1000_VT_MSGTYPE_NACK
;
5449 msgbuf
[0] |= E1000_VT_MSGTYPE_ACK
;
5451 igb_write_mbx(hw
, msgbuf
, 1, vf
);
5454 static void igb_msg_task(struct igb_adapter
*adapter
)
5456 struct e1000_hw
*hw
= &adapter
->hw
;
5459 for (vf
= 0; vf
< adapter
->vfs_allocated_count
; vf
++) {
5460 /* process any reset requests */
5461 if (!igb_check_for_rst(hw
, vf
))
5462 igb_vf_reset_event(adapter
, vf
);
5464 /* process any messages pending */
5465 if (!igb_check_for_msg(hw
, vf
))
5466 igb_rcv_msg_from_vf(adapter
, vf
);
5468 /* process any acks */
5469 if (!igb_check_for_ack(hw
, vf
))
5470 igb_rcv_ack_from_vf(adapter
, vf
);
5475 * igb_set_uta - Set unicast filter table address
5476 * @adapter: board private structure
5478 * The unicast table address is a register array of 32-bit registers.
5479 * The table is meant to be used in a way similar to how the MTA is used
5480 * however due to certain limitations in the hardware it is necessary to
5481 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5482 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5484 static void igb_set_uta(struct igb_adapter
*adapter
)
5486 struct e1000_hw
*hw
= &adapter
->hw
;
5489 /* The UTA table only exists on 82576 hardware and newer */
5490 if (hw
->mac
.type
< e1000_82576
)
5493 /* we only need to do this if VMDq is enabled */
5494 if (!adapter
->vfs_allocated_count
)
5497 for (i
= 0; i
< hw
->mac
.uta_reg_count
; i
++)
5498 array_wr32(E1000_UTA
, i
, ~0);
5502 * igb_intr_msi - Interrupt Handler
5503 * @irq: interrupt number
5504 * @data: pointer to a network interface device structure
5506 static irqreturn_t
igb_intr_msi(int irq
, void *data
)
5508 struct igb_adapter
*adapter
= data
;
5509 struct igb_q_vector
*q_vector
= adapter
->q_vector
[0];
5510 struct e1000_hw
*hw
= &adapter
->hw
;
5511 /* read ICR disables interrupts using IAM */
5512 u32 icr
= rd32(E1000_ICR
);
5514 igb_write_itr(q_vector
);
5516 if (icr
& E1000_ICR_DRSTA
)
5517 schedule_work(&adapter
->reset_task
);
5519 if (icr
& E1000_ICR_DOUTSYNC
) {
5520 /* HW is reporting DMA is out of sync */
5521 adapter
->stats
.doosync
++;
5524 if (icr
& (E1000_ICR_RXSEQ
| E1000_ICR_LSC
)) {
5525 hw
->mac
.get_link_status
= 1;
5526 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
5527 mod_timer(&adapter
->watchdog_timer
, jiffies
+ 1);
5530 napi_schedule(&q_vector
->napi
);
5536 * igb_intr - Legacy Interrupt Handler
5537 * @irq: interrupt number
5538 * @data: pointer to a network interface device structure
5540 static irqreturn_t
igb_intr(int irq
, void *data
)
5542 struct igb_adapter
*adapter
= data
;
5543 struct igb_q_vector
*q_vector
= adapter
->q_vector
[0];
5544 struct e1000_hw
*hw
= &adapter
->hw
;
5545 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5546 * need for the IMC write */
5547 u32 icr
= rd32(E1000_ICR
);
5549 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5550 * not set, then the adapter didn't send an interrupt */
5551 if (!(icr
& E1000_ICR_INT_ASSERTED
))
5554 igb_write_itr(q_vector
);
5556 if (icr
& E1000_ICR_DRSTA
)
5557 schedule_work(&adapter
->reset_task
);
5559 if (icr
& E1000_ICR_DOUTSYNC
) {
5560 /* HW is reporting DMA is out of sync */
5561 adapter
->stats
.doosync
++;
5564 if (icr
& (E1000_ICR_RXSEQ
| E1000_ICR_LSC
)) {
5565 hw
->mac
.get_link_status
= 1;
5566 /* guard against interrupt when we're going down */
5567 if (!test_bit(__IGB_DOWN
, &adapter
->state
))
5568 mod_timer(&adapter
->watchdog_timer
, jiffies
+ 1);
5571 napi_schedule(&q_vector
->napi
);
5576 void igb_ring_irq_enable(struct igb_q_vector
*q_vector
)
5578 struct igb_adapter
*adapter
= q_vector
->adapter
;
5579 struct e1000_hw
*hw
= &adapter
->hw
;
5581 if ((q_vector
->rx
.ring
&& (adapter
->rx_itr_setting
& 3)) ||
5582 (!q_vector
->rx
.ring
&& (adapter
->tx_itr_setting
& 3))) {
5583 if ((adapter
->num_q_vectors
== 1) && !adapter
->vf_data
)
5584 igb_set_itr(q_vector
);
5586 igb_update_ring_itr(q_vector
);
5589 if (!test_bit(__IGB_DOWN
, &adapter
->state
)) {
5590 if (adapter
->msix_entries
)
5591 wr32(E1000_EIMS
, q_vector
->eims_value
);
5593 igb_irq_enable(adapter
);
5598 * igb_poll - NAPI Rx polling callback
5599 * @napi: napi polling structure
5600 * @budget: count of how many packets we should handle
5602 static int igb_poll(struct napi_struct
*napi
, int budget
)
5604 struct igb_q_vector
*q_vector
= container_of(napi
,
5605 struct igb_q_vector
,
5607 bool clean_complete
= true;
5609 #ifdef CONFIG_IGB_DCA
5610 if (q_vector
->adapter
->flags
& IGB_FLAG_DCA_ENABLED
)
5611 igb_update_dca(q_vector
);
5613 if (q_vector
->tx
.ring
)
5614 clean_complete
= igb_clean_tx_irq(q_vector
);
5616 if (q_vector
->rx
.ring
)
5617 clean_complete
&= igb_clean_rx_irq(q_vector
, budget
);
5619 /* If all work not completed, return budget and keep polling */
5620 if (!clean_complete
)
5623 /* If not enough Rx work done, exit the polling mode */
5624 napi_complete(napi
);
5625 igb_ring_irq_enable(q_vector
);
5631 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5632 * @adapter: board private structure
5633 * @shhwtstamps: timestamp structure to update
5634 * @regval: unsigned 64bit system time value.
5636 * We need to convert the system time value stored in the RX/TXSTMP registers
5637 * into a hwtstamp which can be used by the upper level timestamping functions
5639 static void igb_systim_to_hwtstamp(struct igb_adapter
*adapter
,
5640 struct skb_shared_hwtstamps
*shhwtstamps
,
5646 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5647 * 24 to match clock shift we setup earlier.
5649 if (adapter
->hw
.mac
.type
>= e1000_82580
)
5650 regval
<<= IGB_82580_TSYNC_SHIFT
;
5652 ns
= timecounter_cyc2time(&adapter
->clock
, regval
);
5653 timecompare_update(&adapter
->compare
, ns
);
5654 memset(shhwtstamps
, 0, sizeof(struct skb_shared_hwtstamps
));
5655 shhwtstamps
->hwtstamp
= ns_to_ktime(ns
);
5656 shhwtstamps
->syststamp
= timecompare_transform(&adapter
->compare
, ns
);
5660 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5661 * @q_vector: pointer to q_vector containing needed info
5662 * @buffer: pointer to igb_tx_buffer structure
5664 * If we were asked to do hardware stamping and such a time stamp is
5665 * available, then it must have been for this skb here because we only
5666 * allow only one such packet into the queue.
5668 static void igb_tx_hwtstamp(struct igb_q_vector
*q_vector
,
5669 struct igb_tx_buffer
*buffer_info
)
5671 struct igb_adapter
*adapter
= q_vector
->adapter
;
5672 struct e1000_hw
*hw
= &adapter
->hw
;
5673 struct skb_shared_hwtstamps shhwtstamps
;
5676 /* if skb does not support hw timestamp or TX stamp not valid exit */
5677 if (likely(!(buffer_info
->tx_flags
& IGB_TX_FLAGS_TSTAMP
)) ||
5678 !(rd32(E1000_TSYNCTXCTL
) & E1000_TSYNCTXCTL_VALID
))
5681 regval
= rd32(E1000_TXSTMPL
);
5682 regval
|= (u64
)rd32(E1000_TXSTMPH
) << 32;
5684 igb_systim_to_hwtstamp(adapter
, &shhwtstamps
, regval
);
5685 skb_tstamp_tx(buffer_info
->skb
, &shhwtstamps
);
5689 * igb_clean_tx_irq - Reclaim resources after transmit completes
5690 * @q_vector: pointer to q_vector containing needed info
5691 * returns true if ring is completely cleaned
5693 static bool igb_clean_tx_irq(struct igb_q_vector
*q_vector
)
5695 struct igb_adapter
*adapter
= q_vector
->adapter
;
5696 struct igb_ring
*tx_ring
= q_vector
->tx
.ring
;
5697 struct igb_tx_buffer
*tx_buffer
;
5698 union e1000_adv_tx_desc
*tx_desc
, *eop_desc
;
5699 unsigned int total_bytes
= 0, total_packets
= 0;
5700 unsigned int budget
= q_vector
->tx
.work_limit
;
5701 unsigned int i
= tx_ring
->next_to_clean
;
5703 if (test_bit(__IGB_DOWN
, &adapter
->state
))
5706 tx_buffer
= &tx_ring
->tx_buffer_info
[i
];
5707 tx_desc
= IGB_TX_DESC(tx_ring
, i
);
5708 i
-= tx_ring
->count
;
5710 for (; budget
; budget
--) {
5711 eop_desc
= tx_buffer
->next_to_watch
;
5713 /* prevent any other reads prior to eop_desc */
5716 /* if next_to_watch is not set then there is no work pending */
5720 /* if DD is not set pending work has not been completed */
5721 if (!(eop_desc
->wb
.status
& cpu_to_le32(E1000_TXD_STAT_DD
)))
5724 /* clear next_to_watch to prevent false hangs */
5725 tx_buffer
->next_to_watch
= NULL
;
5727 /* update the statistics for this packet */
5728 total_bytes
+= tx_buffer
->bytecount
;
5729 total_packets
+= tx_buffer
->gso_segs
;
5731 /* retrieve hardware timestamp */
5732 igb_tx_hwtstamp(q_vector
, tx_buffer
);
5735 dev_kfree_skb_any(tx_buffer
->skb
);
5736 tx_buffer
->skb
= NULL
;
5738 /* unmap skb header data */
5739 dma_unmap_single(tx_ring
->dev
,
5744 /* clear last DMA location and unmap remaining buffers */
5745 while (tx_desc
!= eop_desc
) {
5752 i
-= tx_ring
->count
;
5753 tx_buffer
= tx_ring
->tx_buffer_info
;
5754 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
5757 /* unmap any remaining paged data */
5758 if (tx_buffer
->dma
) {
5759 dma_unmap_page(tx_ring
->dev
,
5766 /* clear last DMA location */
5769 /* move us one more past the eop_desc for start of next pkt */
5774 i
-= tx_ring
->count
;
5775 tx_buffer
= tx_ring
->tx_buffer_info
;
5776 tx_desc
= IGB_TX_DESC(tx_ring
, 0);
5780 i
+= tx_ring
->count
;
5781 tx_ring
->next_to_clean
= i
;
5782 u64_stats_update_begin(&tx_ring
->tx_syncp
);
5783 tx_ring
->tx_stats
.bytes
+= total_bytes
;
5784 tx_ring
->tx_stats
.packets
+= total_packets
;
5785 u64_stats_update_end(&tx_ring
->tx_syncp
);
5786 q_vector
->tx
.total_bytes
+= total_bytes
;
5787 q_vector
->tx
.total_packets
+= total_packets
;
5789 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG
, &tx_ring
->flags
)) {
5790 struct e1000_hw
*hw
= &adapter
->hw
;
5792 eop_desc
= tx_buffer
->next_to_watch
;
5794 /* Detect a transmit hang in hardware, this serializes the
5795 * check with the clearing of time_stamp and movement of i */
5796 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG
, &tx_ring
->flags
);
5798 time_after(jiffies
, tx_buffer
->time_stamp
+
5799 (adapter
->tx_timeout_factor
* HZ
)) &&
5800 !(rd32(E1000_STATUS
) & E1000_STATUS_TXOFF
)) {
5802 /* detected Tx unit hang */
5803 dev_err(tx_ring
->dev
,
5804 "Detected Tx Unit Hang\n"
5808 " next_to_use <%x>\n"
5809 " next_to_clean <%x>\n"
5810 "buffer_info[next_to_clean]\n"
5811 " time_stamp <%lx>\n"
5812 " next_to_watch <%p>\n"
5814 " desc.status <%x>\n",
5815 tx_ring
->queue_index
,
5816 rd32(E1000_TDH(tx_ring
->reg_idx
)),
5817 readl(tx_ring
->tail
),
5818 tx_ring
->next_to_use
,
5819 tx_ring
->next_to_clean
,
5820 tx_buffer
->time_stamp
,
5823 eop_desc
->wb
.status
);
5824 netif_stop_subqueue(tx_ring
->netdev
,
5825 tx_ring
->queue_index
);
5827 /* we are about to reset, no point in enabling stuff */
5832 if (unlikely(total_packets
&&
5833 netif_carrier_ok(tx_ring
->netdev
) &&
5834 igb_desc_unused(tx_ring
) >= IGB_TX_QUEUE_WAKE
)) {
5835 /* Make sure that anybody stopping the queue after this
5836 * sees the new next_to_clean.
5839 if (__netif_subqueue_stopped(tx_ring
->netdev
,
5840 tx_ring
->queue_index
) &&
5841 !(test_bit(__IGB_DOWN
, &adapter
->state
))) {
5842 netif_wake_subqueue(tx_ring
->netdev
,
5843 tx_ring
->queue_index
);
5845 u64_stats_update_begin(&tx_ring
->tx_syncp
);
5846 tx_ring
->tx_stats
.restart_queue
++;
5847 u64_stats_update_end(&tx_ring
->tx_syncp
);
5854 static inline void igb_rx_checksum(struct igb_ring
*ring
,
5855 union e1000_adv_rx_desc
*rx_desc
,
5856 struct sk_buff
*skb
)
5858 skb_checksum_none_assert(skb
);
5860 /* Ignore Checksum bit is set */
5861 if (igb_test_staterr(rx_desc
, E1000_RXD_STAT_IXSM
))
5864 /* Rx checksum disabled via ethtool */
5865 if (!(ring
->netdev
->features
& NETIF_F_RXCSUM
))
5868 /* TCP/UDP checksum error bit is set */
5869 if (igb_test_staterr(rx_desc
,
5870 E1000_RXDEXT_STATERR_TCPE
|
5871 E1000_RXDEXT_STATERR_IPE
)) {
5873 * work around errata with sctp packets where the TCPE aka
5874 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5875 * packets, (aka let the stack check the crc32c)
5877 if (!((skb
->len
== 60) &&
5878 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM
, &ring
->flags
))) {
5879 u64_stats_update_begin(&ring
->rx_syncp
);
5880 ring
->rx_stats
.csum_err
++;
5881 u64_stats_update_end(&ring
->rx_syncp
);
5883 /* let the stack verify checksum errors */
5886 /* It must be a TCP or UDP packet with a valid checksum */
5887 if (igb_test_staterr(rx_desc
, E1000_RXD_STAT_TCPCS
|
5888 E1000_RXD_STAT_UDPCS
))
5889 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
5891 dev_dbg(ring
->dev
, "cksum success: bits %08X\n",
5892 le32_to_cpu(rx_desc
->wb
.upper
.status_error
));
5895 static inline void igb_rx_hash(struct igb_ring
*ring
,
5896 union e1000_adv_rx_desc
*rx_desc
,
5897 struct sk_buff
*skb
)
5899 if (ring
->netdev
->features
& NETIF_F_RXHASH
)
5900 skb
->rxhash
= le32_to_cpu(rx_desc
->wb
.lower
.hi_dword
.rss
);
5903 static void igb_rx_hwtstamp(struct igb_q_vector
*q_vector
,
5904 union e1000_adv_rx_desc
*rx_desc
,
5905 struct sk_buff
*skb
)
5907 struct igb_adapter
*adapter
= q_vector
->adapter
;
5908 struct e1000_hw
*hw
= &adapter
->hw
;
5911 if (!igb_test_staterr(rx_desc
, E1000_RXDADV_STAT_TSIP
|
5912 E1000_RXDADV_STAT_TS
))
5916 * If this bit is set, then the RX registers contain the time stamp. No
5917 * other packet will be time stamped until we read these registers, so
5918 * read the registers to make them available again. Because only one
5919 * packet can be time stamped at a time, we know that the register
5920 * values must belong to this one here and therefore we don't need to
5921 * compare any of the additional attributes stored for it.
5923 * If nothing went wrong, then it should have a shared tx_flags that we
5924 * can turn into a skb_shared_hwtstamps.
5926 if (igb_test_staterr(rx_desc
, E1000_RXDADV_STAT_TSIP
)) {
5927 u32
*stamp
= (u32
*)skb
->data
;
5928 regval
= le32_to_cpu(*(stamp
+ 2));
5929 regval
|= (u64
)le32_to_cpu(*(stamp
+ 3)) << 32;
5930 skb_pull(skb
, IGB_TS_HDR_LEN
);
5932 if(!(rd32(E1000_TSYNCRXCTL
) & E1000_TSYNCRXCTL_VALID
))
5935 regval
= rd32(E1000_RXSTMPL
);
5936 regval
|= (u64
)rd32(E1000_RXSTMPH
) << 32;
5939 igb_systim_to_hwtstamp(adapter
, skb_hwtstamps(skb
), regval
);
5942 static void igb_rx_vlan(struct igb_ring
*ring
,
5943 union e1000_adv_rx_desc
*rx_desc
,
5944 struct sk_buff
*skb
)
5946 if (igb_test_staterr(rx_desc
, E1000_RXD_STAT_VP
)) {
5948 if (igb_test_staterr(rx_desc
, E1000_RXDEXT_STATERR_LB
) &&
5949 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP
, &ring
->flags
))
5950 vid
= be16_to_cpu(rx_desc
->wb
.upper
.vlan
);
5952 vid
= le16_to_cpu(rx_desc
->wb
.upper
.vlan
);
5954 __vlan_hwaccel_put_tag(skb
, vid
);
5958 static inline u16
igb_get_hlen(union e1000_adv_rx_desc
*rx_desc
)
5960 /* HW will not DMA in data larger than the given buffer, even if it
5961 * parses the (NFS, of course) header to be larger. In that case, it
5962 * fills the header buffer and spills the rest into the page.
5964 u16 hlen
= (le16_to_cpu(rx_desc
->wb
.lower
.lo_dword
.hdr_info
) &
5965 E1000_RXDADV_HDRBUFLEN_MASK
) >> E1000_RXDADV_HDRBUFLEN_SHIFT
;
5966 if (hlen
> IGB_RX_HDR_LEN
)
5967 hlen
= IGB_RX_HDR_LEN
;
5971 static bool igb_clean_rx_irq(struct igb_q_vector
*q_vector
, int budget
)
5973 struct igb_ring
*rx_ring
= q_vector
->rx
.ring
;
5974 union e1000_adv_rx_desc
*rx_desc
;
5975 const int current_node
= numa_node_id();
5976 unsigned int total_bytes
= 0, total_packets
= 0;
5977 u16 cleaned_count
= igb_desc_unused(rx_ring
);
5978 u16 i
= rx_ring
->next_to_clean
;
5980 rx_desc
= IGB_RX_DESC(rx_ring
, i
);
5982 while (igb_test_staterr(rx_desc
, E1000_RXD_STAT_DD
)) {
5983 struct igb_rx_buffer
*buffer_info
= &rx_ring
->rx_buffer_info
[i
];
5984 struct sk_buff
*skb
= buffer_info
->skb
;
5985 union e1000_adv_rx_desc
*next_rxd
;
5987 buffer_info
->skb
= NULL
;
5988 prefetch(skb
->data
);
5991 if (i
== rx_ring
->count
)
5994 next_rxd
= IGB_RX_DESC(rx_ring
, i
);
5998 * This memory barrier is needed to keep us from reading
5999 * any other fields out of the rx_desc until we know the
6000 * RXD_STAT_DD bit is set
6004 if (!skb_is_nonlinear(skb
)) {
6005 __skb_put(skb
, igb_get_hlen(rx_desc
));
6006 dma_unmap_single(rx_ring
->dev
, buffer_info
->dma
,
6009 buffer_info
->dma
= 0;
6012 if (rx_desc
->wb
.upper
.length
) {
6013 u16 length
= le16_to_cpu(rx_desc
->wb
.upper
.length
);
6015 skb_fill_page_desc(skb
, skb_shinfo(skb
)->nr_frags
,
6017 buffer_info
->page_offset
,
6021 skb
->data_len
+= length
;
6022 skb
->truesize
+= PAGE_SIZE
/ 2;
6024 if ((page_count(buffer_info
->page
) != 1) ||
6025 (page_to_nid(buffer_info
->page
) != current_node
))
6026 buffer_info
->page
= NULL
;
6028 get_page(buffer_info
->page
);
6030 dma_unmap_page(rx_ring
->dev
, buffer_info
->page_dma
,
6031 PAGE_SIZE
/ 2, DMA_FROM_DEVICE
);
6032 buffer_info
->page_dma
= 0;
6035 if (!igb_test_staterr(rx_desc
, E1000_RXD_STAT_EOP
)) {
6036 struct igb_rx_buffer
*next_buffer
;
6037 next_buffer
= &rx_ring
->rx_buffer_info
[i
];
6038 buffer_info
->skb
= next_buffer
->skb
;
6039 buffer_info
->dma
= next_buffer
->dma
;
6040 next_buffer
->skb
= skb
;
6041 next_buffer
->dma
= 0;
6045 if (igb_test_staterr(rx_desc
,
6046 E1000_RXDEXT_ERR_FRAME_ERR_MASK
)) {
6047 dev_kfree_skb_any(skb
);
6051 igb_rx_hwtstamp(q_vector
, rx_desc
, skb
);
6052 igb_rx_hash(rx_ring
, rx_desc
, skb
);
6053 igb_rx_checksum(rx_ring
, rx_desc
, skb
);
6054 igb_rx_vlan(rx_ring
, rx_desc
, skb
);
6056 total_bytes
+= skb
->len
;
6059 skb
->protocol
= eth_type_trans(skb
, rx_ring
->netdev
);
6061 napi_gro_receive(&q_vector
->napi
, skb
);
6069 /* return some buffers to hardware, one at a time is too slow */
6070 if (cleaned_count
>= IGB_RX_BUFFER_WRITE
) {
6071 igb_alloc_rx_buffers(rx_ring
, cleaned_count
);
6075 /* use prefetched values */
6079 rx_ring
->next_to_clean
= i
;
6080 u64_stats_update_begin(&rx_ring
->rx_syncp
);
6081 rx_ring
->rx_stats
.packets
+= total_packets
;
6082 rx_ring
->rx_stats
.bytes
+= total_bytes
;
6083 u64_stats_update_end(&rx_ring
->rx_syncp
);
6084 q_vector
->rx
.total_packets
+= total_packets
;
6085 q_vector
->rx
.total_bytes
+= total_bytes
;
6088 igb_alloc_rx_buffers(rx_ring
, cleaned_count
);
6093 static bool igb_alloc_mapped_skb(struct igb_ring
*rx_ring
,
6094 struct igb_rx_buffer
*bi
)
6096 struct sk_buff
*skb
= bi
->skb
;
6097 dma_addr_t dma
= bi
->dma
;
6103 skb
= netdev_alloc_skb_ip_align(rx_ring
->netdev
,
6107 rx_ring
->rx_stats
.alloc_failed
++;
6111 /* initialize skb for ring */
6112 skb_record_rx_queue(skb
, rx_ring
->queue_index
);
6115 dma
= dma_map_single(rx_ring
->dev
, skb
->data
,
6116 IGB_RX_HDR_LEN
, DMA_FROM_DEVICE
);
6118 if (dma_mapping_error(rx_ring
->dev
, dma
)) {
6119 rx_ring
->rx_stats
.alloc_failed
++;
6127 static bool igb_alloc_mapped_page(struct igb_ring
*rx_ring
,
6128 struct igb_rx_buffer
*bi
)
6130 struct page
*page
= bi
->page
;
6131 dma_addr_t page_dma
= bi
->page_dma
;
6132 unsigned int page_offset
= bi
->page_offset
^ (PAGE_SIZE
/ 2);
6138 page
= alloc_page(GFP_ATOMIC
| __GFP_COLD
);
6140 if (unlikely(!page
)) {
6141 rx_ring
->rx_stats
.alloc_failed
++;
6146 page_dma
= dma_map_page(rx_ring
->dev
, page
,
6147 page_offset
, PAGE_SIZE
/ 2,
6150 if (dma_mapping_error(rx_ring
->dev
, page_dma
)) {
6151 rx_ring
->rx_stats
.alloc_failed
++;
6155 bi
->page_dma
= page_dma
;
6156 bi
->page_offset
= page_offset
;
6161 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6162 * @adapter: address of board private structure
6164 void igb_alloc_rx_buffers(struct igb_ring
*rx_ring
, u16 cleaned_count
)
6166 union e1000_adv_rx_desc
*rx_desc
;
6167 struct igb_rx_buffer
*bi
;
6168 u16 i
= rx_ring
->next_to_use
;
6170 rx_desc
= IGB_RX_DESC(rx_ring
, i
);
6171 bi
= &rx_ring
->rx_buffer_info
[i
];
6172 i
-= rx_ring
->count
;
6174 while (cleaned_count
--) {
6175 if (!igb_alloc_mapped_skb(rx_ring
, bi
))
6178 /* Refresh the desc even if buffer_addrs didn't change
6179 * because each write-back erases this info. */
6180 rx_desc
->read
.hdr_addr
= cpu_to_le64(bi
->dma
);
6182 if (!igb_alloc_mapped_page(rx_ring
, bi
))
6185 rx_desc
->read
.pkt_addr
= cpu_to_le64(bi
->page_dma
);
6191 rx_desc
= IGB_RX_DESC(rx_ring
, 0);
6192 bi
= rx_ring
->rx_buffer_info
;
6193 i
-= rx_ring
->count
;
6196 /* clear the hdr_addr for the next_to_use descriptor */
6197 rx_desc
->read
.hdr_addr
= 0;
6200 i
+= rx_ring
->count
;
6202 if (rx_ring
->next_to_use
!= i
) {
6203 rx_ring
->next_to_use
= i
;
6205 /* Force memory writes to complete before letting h/w
6206 * know there are new descriptors to fetch. (Only
6207 * applicable for weak-ordered memory model archs,
6208 * such as IA-64). */
6210 writel(i
, rx_ring
->tail
);
6220 static int igb_mii_ioctl(struct net_device
*netdev
, struct ifreq
*ifr
, int cmd
)
6222 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6223 struct mii_ioctl_data
*data
= if_mii(ifr
);
6225 if (adapter
->hw
.phy
.media_type
!= e1000_media_type_copper
)
6230 data
->phy_id
= adapter
->hw
.phy
.addr
;
6233 if (igb_read_phy_reg(&adapter
->hw
, data
->reg_num
& 0x1F,
6245 * igb_hwtstamp_ioctl - control hardware time stamping
6250 * Outgoing time stamping can be enabled and disabled. Play nice and
6251 * disable it when requested, although it shouldn't case any overhead
6252 * when no packet needs it. At most one packet in the queue may be
6253 * marked for time stamping, otherwise it would be impossible to tell
6254 * for sure to which packet the hardware time stamp belongs.
6256 * Incoming time stamping has to be configured via the hardware
6257 * filters. Not all combinations are supported, in particular event
6258 * type has to be specified. Matching the kind of event packet is
6259 * not supported, with the exception of "all V2 events regardless of
6263 static int igb_hwtstamp_ioctl(struct net_device
*netdev
,
6264 struct ifreq
*ifr
, int cmd
)
6266 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6267 struct e1000_hw
*hw
= &adapter
->hw
;
6268 struct hwtstamp_config config
;
6269 u32 tsync_tx_ctl
= E1000_TSYNCTXCTL_ENABLED
;
6270 u32 tsync_rx_ctl
= E1000_TSYNCRXCTL_ENABLED
;
6271 u32 tsync_rx_cfg
= 0;
6276 if (copy_from_user(&config
, ifr
->ifr_data
, sizeof(config
)))
6279 /* reserved for future extensions */
6283 switch (config
.tx_type
) {
6284 case HWTSTAMP_TX_OFF
:
6286 case HWTSTAMP_TX_ON
:
6292 switch (config
.rx_filter
) {
6293 case HWTSTAMP_FILTER_NONE
:
6296 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT
:
6297 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT
:
6298 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT
:
6299 case HWTSTAMP_FILTER_ALL
:
6301 * register TSYNCRXCFG must be set, therefore it is not
6302 * possible to time stamp both Sync and Delay_Req messages
6303 * => fall back to time stamping all packets
6305 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_ALL
;
6306 config
.rx_filter
= HWTSTAMP_FILTER_ALL
;
6308 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC
:
6309 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L4_V1
;
6310 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE
;
6313 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ
:
6314 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L4_V1
;
6315 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE
;
6318 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC
:
6319 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC
:
6320 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L2_L4_V2
;
6321 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE
;
6324 config
.rx_filter
= HWTSTAMP_FILTER_SOME
;
6326 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ
:
6327 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ
:
6328 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_L2_L4_V2
;
6329 tsync_rx_cfg
= E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE
;
6332 config
.rx_filter
= HWTSTAMP_FILTER_SOME
;
6334 case HWTSTAMP_FILTER_PTP_V2_EVENT
:
6335 case HWTSTAMP_FILTER_PTP_V2_SYNC
:
6336 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ
:
6337 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_EVENT_V2
;
6338 config
.rx_filter
= HWTSTAMP_FILTER_PTP_V2_EVENT
;
6346 if (hw
->mac
.type
== e1000_82575
) {
6347 if (tsync_rx_ctl
| tsync_tx_ctl
)
6353 * Per-packet timestamping only works if all packets are
6354 * timestamped, so enable timestamping in all packets as
6355 * long as one rx filter was configured.
6357 if ((hw
->mac
.type
>= e1000_82580
) && tsync_rx_ctl
) {
6358 tsync_rx_ctl
= E1000_TSYNCRXCTL_ENABLED
;
6359 tsync_rx_ctl
|= E1000_TSYNCRXCTL_TYPE_ALL
;
6362 /* enable/disable TX */
6363 regval
= rd32(E1000_TSYNCTXCTL
);
6364 regval
&= ~E1000_TSYNCTXCTL_ENABLED
;
6365 regval
|= tsync_tx_ctl
;
6366 wr32(E1000_TSYNCTXCTL
, regval
);
6368 /* enable/disable RX */
6369 regval
= rd32(E1000_TSYNCRXCTL
);
6370 regval
&= ~(E1000_TSYNCRXCTL_ENABLED
| E1000_TSYNCRXCTL_TYPE_MASK
);
6371 regval
|= tsync_rx_ctl
;
6372 wr32(E1000_TSYNCRXCTL
, regval
);
6374 /* define which PTP packets are time stamped */
6375 wr32(E1000_TSYNCRXCFG
, tsync_rx_cfg
);
6377 /* define ethertype filter for timestamped packets */
6380 (E1000_ETQF_FILTER_ENABLE
| /* enable filter */
6381 E1000_ETQF_1588
| /* enable timestamping */
6382 ETH_P_1588
)); /* 1588 eth protocol type */
6384 wr32(E1000_ETQF(3), 0);
6386 #define PTP_PORT 319
6387 /* L4 Queue Filter[3]: filter by destination port and protocol */
6389 u32 ftqf
= (IPPROTO_UDP
/* UDP */
6390 | E1000_FTQF_VF_BP
/* VF not compared */
6391 | E1000_FTQF_1588_TIME_STAMP
/* Enable Timestamping */
6392 | E1000_FTQF_MASK
); /* mask all inputs */
6393 ftqf
&= ~E1000_FTQF_MASK_PROTO_BP
; /* enable protocol check */
6395 wr32(E1000_IMIR(3), htons(PTP_PORT
));
6396 wr32(E1000_IMIREXT(3),
6397 (E1000_IMIREXT_SIZE_BP
| E1000_IMIREXT_CTRL_BP
));
6398 if (hw
->mac
.type
== e1000_82576
) {
6399 /* enable source port check */
6400 wr32(E1000_SPQF(3), htons(PTP_PORT
));
6401 ftqf
&= ~E1000_FTQF_MASK_SOURCE_PORT_BP
;
6403 wr32(E1000_FTQF(3), ftqf
);
6405 wr32(E1000_FTQF(3), E1000_FTQF_MASK
);
6409 adapter
->hwtstamp_config
= config
;
6411 /* clear TX/RX time stamp registers, just to be sure */
6412 regval
= rd32(E1000_TXSTMPH
);
6413 regval
= rd32(E1000_RXSTMPH
);
6415 return copy_to_user(ifr
->ifr_data
, &config
, sizeof(config
)) ?
6425 static int igb_ioctl(struct net_device
*netdev
, struct ifreq
*ifr
, int cmd
)
6431 return igb_mii_ioctl(netdev
, ifr
, cmd
);
6433 return igb_hwtstamp_ioctl(netdev
, ifr
, cmd
);
6439 s32
igb_read_pcie_cap_reg(struct e1000_hw
*hw
, u32 reg
, u16
*value
)
6441 struct igb_adapter
*adapter
= hw
->back
;
6444 cap_offset
= adapter
->pdev
->pcie_cap
;
6446 return -E1000_ERR_CONFIG
;
6448 pci_read_config_word(adapter
->pdev
, cap_offset
+ reg
, value
);
6453 s32
igb_write_pcie_cap_reg(struct e1000_hw
*hw
, u32 reg
, u16
*value
)
6455 struct igb_adapter
*adapter
= hw
->back
;
6458 cap_offset
= adapter
->pdev
->pcie_cap
;
6460 return -E1000_ERR_CONFIG
;
6462 pci_write_config_word(adapter
->pdev
, cap_offset
+ reg
, *value
);
6467 static void igb_vlan_mode(struct net_device
*netdev
, netdev_features_t features
)
6469 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6470 struct e1000_hw
*hw
= &adapter
->hw
;
6472 bool enable
= !!(features
& NETIF_F_HW_VLAN_RX
);
6475 /* enable VLAN tag insert/strip */
6476 ctrl
= rd32(E1000_CTRL
);
6477 ctrl
|= E1000_CTRL_VME
;
6478 wr32(E1000_CTRL
, ctrl
);
6480 /* Disable CFI check */
6481 rctl
= rd32(E1000_RCTL
);
6482 rctl
&= ~E1000_RCTL_CFIEN
;
6483 wr32(E1000_RCTL
, rctl
);
6485 /* disable VLAN tag insert/strip */
6486 ctrl
= rd32(E1000_CTRL
);
6487 ctrl
&= ~E1000_CTRL_VME
;
6488 wr32(E1000_CTRL
, ctrl
);
6491 igb_rlpml_set(adapter
);
6494 static int igb_vlan_rx_add_vid(struct net_device
*netdev
, u16 vid
)
6496 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6497 struct e1000_hw
*hw
= &adapter
->hw
;
6498 int pf_id
= adapter
->vfs_allocated_count
;
6500 /* attempt to add filter to vlvf array */
6501 igb_vlvf_set(adapter
, vid
, true, pf_id
);
6503 /* add the filter since PF can receive vlans w/o entry in vlvf */
6504 igb_vfta_set(hw
, vid
, true);
6506 set_bit(vid
, adapter
->active_vlans
);
6511 static int igb_vlan_rx_kill_vid(struct net_device
*netdev
, u16 vid
)
6513 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6514 struct e1000_hw
*hw
= &adapter
->hw
;
6515 int pf_id
= adapter
->vfs_allocated_count
;
6518 /* remove vlan from VLVF table array */
6519 err
= igb_vlvf_set(adapter
, vid
, false, pf_id
);
6521 /* if vid was not present in VLVF just remove it from table */
6523 igb_vfta_set(hw
, vid
, false);
6525 clear_bit(vid
, adapter
->active_vlans
);
6530 static void igb_restore_vlan(struct igb_adapter
*adapter
)
6534 igb_vlan_mode(adapter
->netdev
, adapter
->netdev
->features
);
6536 for_each_set_bit(vid
, adapter
->active_vlans
, VLAN_N_VID
)
6537 igb_vlan_rx_add_vid(adapter
->netdev
, vid
);
6540 int igb_set_spd_dplx(struct igb_adapter
*adapter
, u32 spd
, u8 dplx
)
6542 struct pci_dev
*pdev
= adapter
->pdev
;
6543 struct e1000_mac_info
*mac
= &adapter
->hw
.mac
;
6547 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6548 * for the switch() below to work */
6549 if ((spd
& 1) || (dplx
& ~1))
6552 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6553 if ((adapter
->hw
.phy
.media_type
== e1000_media_type_internal_serdes
) &&
6554 spd
!= SPEED_1000
&&
6555 dplx
!= DUPLEX_FULL
)
6558 switch (spd
+ dplx
) {
6559 case SPEED_10
+ DUPLEX_HALF
:
6560 mac
->forced_speed_duplex
= ADVERTISE_10_HALF
;
6562 case SPEED_10
+ DUPLEX_FULL
:
6563 mac
->forced_speed_duplex
= ADVERTISE_10_FULL
;
6565 case SPEED_100
+ DUPLEX_HALF
:
6566 mac
->forced_speed_duplex
= ADVERTISE_100_HALF
;
6568 case SPEED_100
+ DUPLEX_FULL
:
6569 mac
->forced_speed_duplex
= ADVERTISE_100_FULL
;
6571 case SPEED_1000
+ DUPLEX_FULL
:
6573 adapter
->hw
.phy
.autoneg_advertised
= ADVERTISE_1000_FULL
;
6575 case SPEED_1000
+ DUPLEX_HALF
: /* not supported */
6582 dev_err(&pdev
->dev
, "Unsupported Speed/Duplex configuration\n");
6586 static int __igb_shutdown(struct pci_dev
*pdev
, bool *enable_wake
)
6588 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6589 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6590 struct e1000_hw
*hw
= &adapter
->hw
;
6591 u32 ctrl
, rctl
, status
;
6592 u32 wufc
= adapter
->wol
;
6597 netif_device_detach(netdev
);
6599 if (netif_running(netdev
))
6602 igb_clear_interrupt_scheme(adapter
);
6605 retval
= pci_save_state(pdev
);
6610 status
= rd32(E1000_STATUS
);
6611 if (status
& E1000_STATUS_LU
)
6612 wufc
&= ~E1000_WUFC_LNKC
;
6615 igb_setup_rctl(adapter
);
6616 igb_set_rx_mode(netdev
);
6618 /* turn on all-multi mode if wake on multicast is enabled */
6619 if (wufc
& E1000_WUFC_MC
) {
6620 rctl
= rd32(E1000_RCTL
);
6621 rctl
|= E1000_RCTL_MPE
;
6622 wr32(E1000_RCTL
, rctl
);
6625 ctrl
= rd32(E1000_CTRL
);
6626 /* advertise wake from D3Cold */
6627 #define E1000_CTRL_ADVD3WUC 0x00100000
6628 /* phy power management enable */
6629 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6630 ctrl
|= E1000_CTRL_ADVD3WUC
;
6631 wr32(E1000_CTRL
, ctrl
);
6633 /* Allow time for pending master requests to run */
6634 igb_disable_pcie_master(hw
);
6636 wr32(E1000_WUC
, E1000_WUC_PME_EN
);
6637 wr32(E1000_WUFC
, wufc
);
6640 wr32(E1000_WUFC
, 0);
6643 *enable_wake
= wufc
|| adapter
->en_mng_pt
;
6645 igb_power_down_link(adapter
);
6647 igb_power_up_link(adapter
);
6649 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6650 * would have already happened in close and is redundant. */
6651 igb_release_hw_control(adapter
);
6653 pci_disable_device(pdev
);
6659 static int igb_suspend(struct pci_dev
*pdev
, pm_message_t state
)
6664 retval
= __igb_shutdown(pdev
, &wake
);
6669 pci_prepare_to_sleep(pdev
);
6671 pci_wake_from_d3(pdev
, false);
6672 pci_set_power_state(pdev
, PCI_D3hot
);
6678 static int igb_resume(struct pci_dev
*pdev
)
6680 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6681 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6682 struct e1000_hw
*hw
= &adapter
->hw
;
6685 pci_set_power_state(pdev
, PCI_D0
);
6686 pci_restore_state(pdev
);
6687 pci_save_state(pdev
);
6689 err
= pci_enable_device_mem(pdev
);
6692 "igb: Cannot enable PCI device from suspend\n");
6695 pci_set_master(pdev
);
6697 pci_enable_wake(pdev
, PCI_D3hot
, 0);
6698 pci_enable_wake(pdev
, PCI_D3cold
, 0);
6700 if (igb_init_interrupt_scheme(adapter
)) {
6701 dev_err(&pdev
->dev
, "Unable to allocate memory for queues\n");
6707 /* let the f/w know that the h/w is now under the control of the
6709 igb_get_hw_control(adapter
);
6711 wr32(E1000_WUS
, ~0);
6713 if (netif_running(netdev
)) {
6714 err
= igb_open(netdev
);
6719 netif_device_attach(netdev
);
6725 static void igb_shutdown(struct pci_dev
*pdev
)
6729 __igb_shutdown(pdev
, &wake
);
6731 if (system_state
== SYSTEM_POWER_OFF
) {
6732 pci_wake_from_d3(pdev
, wake
);
6733 pci_set_power_state(pdev
, PCI_D3hot
);
6737 #ifdef CONFIG_NET_POLL_CONTROLLER
6739 * Polling 'interrupt' - used by things like netconsole to send skbs
6740 * without having to re-enable interrupts. It's not called while
6741 * the interrupt routine is executing.
6743 static void igb_netpoll(struct net_device
*netdev
)
6745 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6746 struct e1000_hw
*hw
= &adapter
->hw
;
6747 struct igb_q_vector
*q_vector
;
6750 for (i
= 0; i
< adapter
->num_q_vectors
; i
++) {
6751 q_vector
= adapter
->q_vector
[i
];
6752 if (adapter
->msix_entries
)
6753 wr32(E1000_EIMC
, q_vector
->eims_value
);
6755 igb_irq_disable(adapter
);
6756 napi_schedule(&q_vector
->napi
);
6759 #endif /* CONFIG_NET_POLL_CONTROLLER */
6762 * igb_io_error_detected - called when PCI error is detected
6763 * @pdev: Pointer to PCI device
6764 * @state: The current pci connection state
6766 * This function is called after a PCI bus error affecting
6767 * this device has been detected.
6769 static pci_ers_result_t
igb_io_error_detected(struct pci_dev
*pdev
,
6770 pci_channel_state_t state
)
6772 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6773 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6775 netif_device_detach(netdev
);
6777 if (state
== pci_channel_io_perm_failure
)
6778 return PCI_ERS_RESULT_DISCONNECT
;
6780 if (netif_running(netdev
))
6782 pci_disable_device(pdev
);
6784 /* Request a slot slot reset. */
6785 return PCI_ERS_RESULT_NEED_RESET
;
6789 * igb_io_slot_reset - called after the pci bus has been reset.
6790 * @pdev: Pointer to PCI device
6792 * Restart the card from scratch, as if from a cold-boot. Implementation
6793 * resembles the first-half of the igb_resume routine.
6795 static pci_ers_result_t
igb_io_slot_reset(struct pci_dev
*pdev
)
6797 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6798 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6799 struct e1000_hw
*hw
= &adapter
->hw
;
6800 pci_ers_result_t result
;
6803 if (pci_enable_device_mem(pdev
)) {
6805 "Cannot re-enable PCI device after reset.\n");
6806 result
= PCI_ERS_RESULT_DISCONNECT
;
6808 pci_set_master(pdev
);
6809 pci_restore_state(pdev
);
6810 pci_save_state(pdev
);
6812 pci_enable_wake(pdev
, PCI_D3hot
, 0);
6813 pci_enable_wake(pdev
, PCI_D3cold
, 0);
6816 wr32(E1000_WUS
, ~0);
6817 result
= PCI_ERS_RESULT_RECOVERED
;
6820 err
= pci_cleanup_aer_uncorrect_error_status(pdev
);
6822 dev_err(&pdev
->dev
, "pci_cleanup_aer_uncorrect_error_status "
6823 "failed 0x%0x\n", err
);
6824 /* non-fatal, continue */
6831 * igb_io_resume - called when traffic can start flowing again.
6832 * @pdev: Pointer to PCI device
6834 * This callback is called when the error recovery driver tells us that
6835 * its OK to resume normal operation. Implementation resembles the
6836 * second-half of the igb_resume routine.
6838 static void igb_io_resume(struct pci_dev
*pdev
)
6840 struct net_device
*netdev
= pci_get_drvdata(pdev
);
6841 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6843 if (netif_running(netdev
)) {
6844 if (igb_up(adapter
)) {
6845 dev_err(&pdev
->dev
, "igb_up failed after reset\n");
6850 netif_device_attach(netdev
);
6852 /* let the f/w know that the h/w is now under the control of the
6854 igb_get_hw_control(adapter
);
6857 static void igb_rar_set_qsel(struct igb_adapter
*adapter
, u8
*addr
, u32 index
,
6860 u32 rar_low
, rar_high
;
6861 struct e1000_hw
*hw
= &adapter
->hw
;
6863 /* HW expects these in little endian so we reverse the byte order
6864 * from network order (big endian) to little endian
6866 rar_low
= ((u32
) addr
[0] | ((u32
) addr
[1] << 8) |
6867 ((u32
) addr
[2] << 16) | ((u32
) addr
[3] << 24));
6868 rar_high
= ((u32
) addr
[4] | ((u32
) addr
[5] << 8));
6870 /* Indicate to hardware the Address is Valid. */
6871 rar_high
|= E1000_RAH_AV
;
6873 if (hw
->mac
.type
== e1000_82575
)
6874 rar_high
|= E1000_RAH_POOL_1
* qsel
;
6876 rar_high
|= E1000_RAH_POOL_1
<< qsel
;
6878 wr32(E1000_RAL(index
), rar_low
);
6880 wr32(E1000_RAH(index
), rar_high
);
6884 static int igb_set_vf_mac(struct igb_adapter
*adapter
,
6885 int vf
, unsigned char *mac_addr
)
6887 struct e1000_hw
*hw
= &adapter
->hw
;
6888 /* VF MAC addresses start at end of receive addresses and moves
6889 * torwards the first, as a result a collision should not be possible */
6890 int rar_entry
= hw
->mac
.rar_entry_count
- (vf
+ 1);
6892 memcpy(adapter
->vf_data
[vf
].vf_mac_addresses
, mac_addr
, ETH_ALEN
);
6894 igb_rar_set_qsel(adapter
, mac_addr
, rar_entry
, vf
);
6899 static int igb_ndo_set_vf_mac(struct net_device
*netdev
, int vf
, u8
*mac
)
6901 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6902 if (!is_valid_ether_addr(mac
) || (vf
>= adapter
->vfs_allocated_count
))
6904 adapter
->vf_data
[vf
].flags
|= IGB_VF_FLAG_PF_SET_MAC
;
6905 dev_info(&adapter
->pdev
->dev
, "setting MAC %pM on VF %d\n", mac
, vf
);
6906 dev_info(&adapter
->pdev
->dev
, "Reload the VF driver to make this"
6907 " change effective.");
6908 if (test_bit(__IGB_DOWN
, &adapter
->state
)) {
6909 dev_warn(&adapter
->pdev
->dev
, "The VF MAC address has been set,"
6910 " but the PF device is not up.\n");
6911 dev_warn(&adapter
->pdev
->dev
, "Bring the PF device up before"
6912 " attempting to use the VF device.\n");
6914 return igb_set_vf_mac(adapter
, vf
, mac
);
6917 static int igb_link_mbps(int internal_link_speed
)
6919 switch (internal_link_speed
) {
6929 static void igb_set_vf_rate_limit(struct e1000_hw
*hw
, int vf
, int tx_rate
,
6936 /* Calculate the rate factor values to set */
6937 rf_int
= link_speed
/ tx_rate
;
6938 rf_dec
= (link_speed
- (rf_int
* tx_rate
));
6939 rf_dec
= (rf_dec
* (1<<E1000_RTTBCNRC_RF_INT_SHIFT
)) / tx_rate
;
6941 bcnrc_val
= E1000_RTTBCNRC_RS_ENA
;
6942 bcnrc_val
|= ((rf_int
<<E1000_RTTBCNRC_RF_INT_SHIFT
) &
6943 E1000_RTTBCNRC_RF_INT_MASK
);
6944 bcnrc_val
|= (rf_dec
& E1000_RTTBCNRC_RF_DEC_MASK
);
6949 wr32(E1000_RTTDQSEL
, vf
); /* vf X uses queue X */
6950 wr32(E1000_RTTBCNRC
, bcnrc_val
);
6953 static void igb_check_vf_rate_limit(struct igb_adapter
*adapter
)
6955 int actual_link_speed
, i
;
6956 bool reset_rate
= false;
6958 /* VF TX rate limit was not set or not supported */
6959 if ((adapter
->vf_rate_link_speed
== 0) ||
6960 (adapter
->hw
.mac
.type
!= e1000_82576
))
6963 actual_link_speed
= igb_link_mbps(adapter
->link_speed
);
6964 if (actual_link_speed
!= adapter
->vf_rate_link_speed
) {
6966 adapter
->vf_rate_link_speed
= 0;
6967 dev_info(&adapter
->pdev
->dev
,
6968 "Link speed has been changed. VF Transmit "
6969 "rate is disabled\n");
6972 for (i
= 0; i
< adapter
->vfs_allocated_count
; i
++) {
6974 adapter
->vf_data
[i
].tx_rate
= 0;
6976 igb_set_vf_rate_limit(&adapter
->hw
, i
,
6977 adapter
->vf_data
[i
].tx_rate
,
6982 static int igb_ndo_set_vf_bw(struct net_device
*netdev
, int vf
, int tx_rate
)
6984 struct igb_adapter
*adapter
= netdev_priv(netdev
);
6985 struct e1000_hw
*hw
= &adapter
->hw
;
6986 int actual_link_speed
;
6988 if (hw
->mac
.type
!= e1000_82576
)
6991 actual_link_speed
= igb_link_mbps(adapter
->link_speed
);
6992 if ((vf
>= adapter
->vfs_allocated_count
) ||
6993 (!(rd32(E1000_STATUS
) & E1000_STATUS_LU
)) ||
6994 (tx_rate
< 0) || (tx_rate
> actual_link_speed
))
6997 adapter
->vf_rate_link_speed
= actual_link_speed
;
6998 adapter
->vf_data
[vf
].tx_rate
= (u16
)tx_rate
;
6999 igb_set_vf_rate_limit(hw
, vf
, tx_rate
, actual_link_speed
);
7004 static int igb_ndo_get_vf_config(struct net_device
*netdev
,
7005 int vf
, struct ifla_vf_info
*ivi
)
7007 struct igb_adapter
*adapter
= netdev_priv(netdev
);
7008 if (vf
>= adapter
->vfs_allocated_count
)
7011 memcpy(&ivi
->mac
, adapter
->vf_data
[vf
].vf_mac_addresses
, ETH_ALEN
);
7012 ivi
->tx_rate
= adapter
->vf_data
[vf
].tx_rate
;
7013 ivi
->vlan
= adapter
->vf_data
[vf
].pf_vlan
;
7014 ivi
->qos
= adapter
->vf_data
[vf
].pf_qos
;
7018 static void igb_vmm_control(struct igb_adapter
*adapter
)
7020 struct e1000_hw
*hw
= &adapter
->hw
;
7023 switch (hw
->mac
.type
) {
7026 /* replication is not supported for 82575 */
7029 /* notify HW that the MAC is adding vlan tags */
7030 reg
= rd32(E1000_DTXCTL
);
7031 reg
|= E1000_DTXCTL_VLAN_ADDED
;
7032 wr32(E1000_DTXCTL
, reg
);
7034 /* enable replication vlan tag stripping */
7035 reg
= rd32(E1000_RPLOLR
);
7036 reg
|= E1000_RPLOLR_STRVLAN
;
7037 wr32(E1000_RPLOLR
, reg
);
7039 /* none of the above registers are supported by i350 */
7043 if (adapter
->vfs_allocated_count
) {
7044 igb_vmdq_set_loopback_pf(hw
, true);
7045 igb_vmdq_set_replication_pf(hw
, true);
7046 igb_vmdq_set_anti_spoofing_pf(hw
, true,
7047 adapter
->vfs_allocated_count
);
7049 igb_vmdq_set_loopback_pf(hw
, false);
7050 igb_vmdq_set_replication_pf(hw
, false);
7054 static void igb_init_dmac(struct igb_adapter
*adapter
, u32 pba
)
7056 struct e1000_hw
*hw
= &adapter
->hw
;
7060 if (hw
->mac
.type
> e1000_82580
) {
7061 if (adapter
->flags
& IGB_FLAG_DMAC
) {
7064 /* force threshold to 0. */
7065 wr32(E1000_DMCTXTH
, 0);
7068 * DMA Coalescing high water mark needs to be greater
7069 * than the Rx threshold. Set hwm to PBA - max frame
7070 * size in 16B units, capping it at PBA - 6KB.
7072 hwm
= 64 * pba
- adapter
->max_frame_size
/ 16;
7073 if (hwm
< 64 * (pba
- 6))
7074 hwm
= 64 * (pba
- 6);
7075 reg
= rd32(E1000_FCRTC
);
7076 reg
&= ~E1000_FCRTC_RTH_COAL_MASK
;
7077 reg
|= ((hwm
<< E1000_FCRTC_RTH_COAL_SHIFT
)
7078 & E1000_FCRTC_RTH_COAL_MASK
);
7079 wr32(E1000_FCRTC
, reg
);
7082 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7083 * frame size, capping it at PBA - 10KB.
7085 dmac_thr
= pba
- adapter
->max_frame_size
/ 512;
7086 if (dmac_thr
< pba
- 10)
7087 dmac_thr
= pba
- 10;
7088 reg
= rd32(E1000_DMACR
);
7089 reg
&= ~E1000_DMACR_DMACTHR_MASK
;
7090 reg
|= ((dmac_thr
<< E1000_DMACR_DMACTHR_SHIFT
)
7091 & E1000_DMACR_DMACTHR_MASK
);
7093 /* transition to L0x or L1 if available..*/
7094 reg
|= (E1000_DMACR_DMAC_EN
| E1000_DMACR_DMAC_LX_MASK
);
7096 /* watchdog timer= +-1000 usec in 32usec intervals */
7098 wr32(E1000_DMACR
, reg
);
7101 * no lower threshold to disable
7102 * coalescing(smart fifb)-UTRESH=0
7104 wr32(E1000_DMCRTRH
, 0);
7106 reg
= (IGB_DMCTLX_DCFLUSH_DIS
| 0x4);
7108 wr32(E1000_DMCTLX
, reg
);
7111 * free space in tx packet buffer to wake from
7114 wr32(E1000_DMCTXTH
, (IGB_MIN_TXPBSIZE
-
7115 (IGB_TX_BUF_4096
+ adapter
->max_frame_size
)) >> 6);
7118 * make low power state decision controlled
7121 reg
= rd32(E1000_PCIEMISC
);
7122 reg
&= ~E1000_PCIEMISC_LX_DECISION
;
7123 wr32(E1000_PCIEMISC
, reg
);
7124 } /* endif adapter->dmac is not disabled */
7125 } else if (hw
->mac
.type
== e1000_82580
) {
7126 u32 reg
= rd32(E1000_PCIEMISC
);
7127 wr32(E1000_PCIEMISC
, reg
& ~E1000_PCIEMISC_LX_DECISION
);
7128 wr32(E1000_DMACR
, 0);