2 * QEMU Intel 82576 SR/IOV Ethernet Controller Emulation
5 * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/82576eg-gbe-datasheet.pdf
7 * Copyright (c) 2020-2023 Red Hat, Inc.
8 * Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
9 * Developed by Daynix Computing LTD (http://www.daynix.com)
12 * Akihiko Odaki <akihiko.odaki@daynix.com>
13 * Gal Hammmer <gal.hammer@sap.com>
14 * Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
15 * Dmitry Fleytman <dmitry@daynix.com>
16 * Leonid Bloch <leonid@daynix.com>
17 * Yan Vugenfirer <yan@daynix.com>
19 * Based on work done by:
20 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
21 * Copyright (c) 2008 Qumranet
22 * Based on work done by:
23 * Copyright (c) 2007 Dan Aloni
24 * Copyright (c) 2004 Antony T Curtis
26 * This library is free software; you can redistribute it and/or
27 * modify it under the terms of the GNU Lesser General Public
28 * License as published by the Free Software Foundation; either
29 * version 2.1 of the License, or (at your option) any later version.
31 * This library is distributed in the hope that it will be useful,
32 * but WITHOUT ANY WARRANTY; without even the implied warranty of
33 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
34 * Lesser General Public License for more details.
36 * You should have received a copy of the GNU Lesser General Public
37 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
40 #include "qemu/osdep.h"
41 #include "qemu/units.h"
45 #include "qemu/module.h"
46 #include "qemu/range.h"
47 #include "sysemu/sysemu.h"
49 #include "hw/net/mii.h"
50 #include "hw/pci/pci.h"
51 #include "hw/pci/pcie.h"
52 #include "hw/pci/pcie_sriov.h"
53 #include "hw/pci/msi.h"
54 #include "hw/pci/msix.h"
55 #include "hw/qdev-properties.h"
56 #include "migration/vmstate.h"
58 #include "igb_common.h"
62 #include "qapi/error.h"
63 #include "qom/object.h"
65 #define TYPE_IGB "igb"
66 OBJECT_DECLARE_SIMPLE_TYPE(IGBState
, IGB
)
84 #define IGB_CAP_SRIOV_OFFSET (0x160)
85 #define IGB_VF_OFFSET (0x80)
86 #define IGB_VF_STRIDE (2)
88 #define E1000E_MMIO_IDX 0
89 #define E1000E_FLASH_IDX 1
90 #define E1000E_IO_IDX 2
91 #define E1000E_MSIX_IDX 3
93 #define E1000E_MMIO_SIZE (128 * KiB)
94 #define E1000E_FLASH_SIZE (128 * KiB)
95 #define E1000E_IO_SIZE (32)
96 #define E1000E_MSIX_SIZE (16 * KiB)
98 static void igb_write_config(PCIDevice
*dev
, uint32_t addr
,
99 uint32_t val
, int len
)
101 IGBState
*s
= IGB(dev
);
103 trace_igb_write_config(addr
, val
, len
);
104 pci_default_write_config(dev
, addr
, val
, len
);
106 pcie_cap_flr_write_config(dev
, addr
, val
, len
);
109 if (range_covers_byte(addr
, len
, PCI_COMMAND
) &&
110 (dev
->config
[PCI_COMMAND
] & PCI_COMMAND_MASTER
)) {
111 igb_start_recv(&s
->core
);
116 igb_mmio_read(void *opaque
, hwaddr addr
, unsigned size
)
118 IGBState
*s
= opaque
;
119 return igb_core_read(&s
->core
, addr
, size
);
123 igb_mmio_write(void *opaque
, hwaddr addr
, uint64_t val
, unsigned size
)
125 IGBState
*s
= opaque
;
126 igb_core_write(&s
->core
, addr
, val
, size
);
129 void igb_vf_reset(void *opaque
, uint16_t vfn
)
131 IGBState
*s
= opaque
;
132 igb_core_vf_reset(&s
->core
, vfn
);
136 igb_io_get_reg_index(IGBState
*s
, uint32_t *idx
)
138 if (s
->ioaddr
< 0x1FFFF) {
143 if (s
->ioaddr
< 0x7FFFF) {
144 trace_e1000e_wrn_io_addr_undefined(s
->ioaddr
);
148 if (s
->ioaddr
< 0xFFFFF) {
149 trace_e1000e_wrn_io_addr_flash(s
->ioaddr
);
153 trace_e1000e_wrn_io_addr_unknown(s
->ioaddr
);
158 igb_io_read(void *opaque
, hwaddr addr
, unsigned size
)
160 IGBState
*s
= opaque
;
166 trace_e1000e_io_read_addr(s
->ioaddr
);
169 if (igb_io_get_reg_index(s
, &idx
)) {
170 val
= igb_core_read(&s
->core
, idx
, sizeof(val
));
171 trace_e1000e_io_read_data(idx
, val
);
176 trace_e1000e_wrn_io_read_unknown(addr
);
182 igb_io_write(void *opaque
, hwaddr addr
, uint64_t val
, unsigned size
)
184 IGBState
*s
= opaque
;
189 trace_e1000e_io_write_addr(val
);
190 s
->ioaddr
= (uint32_t) val
;
193 if (igb_io_get_reg_index(s
, &idx
)) {
194 trace_e1000e_io_write_data(idx
, val
);
195 igb_core_write(&s
->core
, idx
, val
, sizeof(val
));
199 trace_e1000e_wrn_io_write_unknown(addr
);
204 static const MemoryRegionOps mmio_ops
= {
205 .read
= igb_mmio_read
,
206 .write
= igb_mmio_write
,
207 .endianness
= DEVICE_LITTLE_ENDIAN
,
209 .min_access_size
= 4,
210 .max_access_size
= 4,
214 static const MemoryRegionOps io_ops
= {
216 .write
= igb_io_write
,
217 .endianness
= DEVICE_LITTLE_ENDIAN
,
219 .min_access_size
= 4,
220 .max_access_size
= 4,
225 igb_nc_can_receive(NetClientState
*nc
)
227 IGBState
*s
= qemu_get_nic_opaque(nc
);
228 return igb_can_receive(&s
->core
);
232 igb_nc_receive_iov(NetClientState
*nc
, const struct iovec
*iov
, int iovcnt
)
234 IGBState
*s
= qemu_get_nic_opaque(nc
);
235 return igb_receive_iov(&s
->core
, iov
, iovcnt
);
239 igb_nc_receive(NetClientState
*nc
, const uint8_t *buf
, size_t size
)
241 IGBState
*s
= qemu_get_nic_opaque(nc
);
242 return igb_receive(&s
->core
, buf
, size
);
246 igb_set_link_status(NetClientState
*nc
)
248 IGBState
*s
= qemu_get_nic_opaque(nc
);
249 igb_core_set_link_status(&s
->core
);
252 static NetClientInfo net_igb_info
= {
253 .type
= NET_CLIENT_DRIVER_NIC
,
254 .size
= sizeof(NICState
),
255 .can_receive
= igb_nc_can_receive
,
256 .receive
= igb_nc_receive
,
257 .receive_iov
= igb_nc_receive_iov
,
258 .link_status_changed
= igb_set_link_status
,
262 * EEPROM (NVM) contents documented in section 6.1, table 6-1:
263 * and in 6.10 Software accessed words.
265 static const uint16_t igb_eeprom_template
[] = {
266 /* Address |Compat.|OEM sp.| ImRev | OEM sp. */
267 0x0000, 0x0000, 0x0000, 0x0d34, 0xffff, 0x2010, 0xffff, 0xffff,
268 /* PBA |ICtrl1 | SSID | SVID | DevID |-------|ICtrl2 */
269 0x1040, 0xffff, 0x002b, 0x0000, 0x8086, 0x10c9, 0x0000, 0x70c3,
270 /* SwPin0| DevID | EESZ |-------|ICtrl3 |PCI-tc | MSIX | APtr */
271 0x0004, 0x10c9, 0x5c00, 0x0000, 0x2880, 0x0014, 0x4a40, 0x0060,
272 /* PCIe Init. Conf 1,2,3 |PCICtrl| LD1,3 |DDevID |DevRev | LD0,2 */
273 0x6cfb, 0xc7b0, 0x0abe, 0x0403, 0x0783, 0x10a6, 0x0001, 0x0602,
274 /* SwPin1| FunC |LAN-PWR|ManHwC |ICtrl3 | IOVct |VDevID |-------*/
275 0x0004, 0x0020, 0x0000, 0x004a, 0x2080, 0x00f5, 0x10ca, 0x0000,
276 /*---------------| LD1,3 | LD0,2 | ROEnd | ROSta | Wdog | VPD */
277 0x0000, 0x0000, 0x4784, 0x4602, 0x0000, 0x0000, 0x1000, 0xffff,
278 /* PCSet0| Ccfg0 |PXEver |IBAcap |PCSet1 | Ccfg1 |iSCVer | ?? */
279 0x0100, 0x4000, 0x131f, 0x4013, 0x0100, 0x4000, 0xffff, 0xffff,
280 /* PCSet2| Ccfg2 |PCSet3 | Ccfg3 | ?? |AltMacP| ?? |CHKSUM */
281 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x00e0, 0xffff, 0x0000,
286 static void igb_core_realize(IGBState
*s
)
288 s
->core
.owner
= &s
->parent_obj
;
289 s
->core
.owner_nic
= s
->nic
;
293 igb_init_msix(IGBState
*s
)
297 res
= msix_init(PCI_DEVICE(s
), IGB_MSIX_VEC_NUM
,
301 E1000E_MSIX_IDX
, 0x2000,
305 trace_e1000e_msix_init_fail(res
);
307 for (i
= 0; i
< IGB_MSIX_VEC_NUM
; i
++) {
308 msix_vector_use(PCI_DEVICE(s
), i
);
314 igb_cleanup_msix(IGBState
*s
)
316 msix_unuse_all_vectors(PCI_DEVICE(s
));
317 msix_uninit(PCI_DEVICE(s
), &s
->msix
, &s
->msix
);
321 igb_init_net_peer(IGBState
*s
, PCIDevice
*pci_dev
, uint8_t *macaddr
)
323 DeviceState
*dev
= DEVICE(pci_dev
);
327 s
->nic
= qemu_new_nic(&net_igb_info
, &s
->conf
,
328 object_get_typename(OBJECT(s
)), dev
->id
, &dev
->mem_reentrancy_guard
, s
);
330 s
->core
.max_queue_num
= s
->conf
.peers
.queues
? s
->conf
.peers
.queues
- 1 : 0;
332 trace_e1000e_mac_set_permanent(MAC_ARG(macaddr
));
333 memcpy(s
->core
.permanent_mac
, macaddr
, sizeof(s
->core
.permanent_mac
));
335 qemu_format_nic_info_str(qemu_get_queue(s
->nic
), macaddr
);
337 /* Setup virtio headers */
338 for (i
= 0; i
< s
->conf
.peers
.queues
; i
++) {
339 nc
= qemu_get_subqueue(s
->nic
, i
);
340 if (!nc
->peer
|| !qemu_has_vnet_hdr(nc
->peer
)) {
341 trace_e1000e_cfg_support_virtio(false);
346 trace_e1000e_cfg_support_virtio(true);
347 s
->core
.has_vnet
= true;
349 for (i
= 0; i
< s
->conf
.peers
.queues
; i
++) {
350 nc
= qemu_get_subqueue(s
->nic
, i
);
351 qemu_set_vnet_hdr_len(nc
->peer
, sizeof(struct virtio_net_hdr
));
356 igb_add_pm_capability(PCIDevice
*pdev
, uint8_t offset
, uint16_t pmc
)
358 Error
*local_err
= NULL
;
359 int ret
= pci_add_capability(pdev
, PCI_CAP_ID_PM
, offset
,
360 PCI_PM_SIZEOF
, &local_err
);
363 error_report_err(local_err
);
367 pci_set_word(pdev
->config
+ offset
+ PCI_PM_PMC
,
371 pci_set_word(pdev
->wmask
+ offset
+ PCI_PM_CTRL
,
372 PCI_PM_CTRL_STATE_MASK
|
373 PCI_PM_CTRL_PME_ENABLE
|
374 PCI_PM_CTRL_DATA_SEL_MASK
);
376 pci_set_word(pdev
->w1cmask
+ offset
+ PCI_PM_CTRL
,
377 PCI_PM_CTRL_PME_STATUS
);
382 static void igb_pci_realize(PCIDevice
*pci_dev
, Error
**errp
)
384 IGBState
*s
= IGB(pci_dev
);
388 trace_e1000e_cb_pci_realize();
390 pci_dev
->config_write
= igb_write_config
;
392 pci_dev
->config
[PCI_CACHE_LINE_SIZE
] = 0x10;
393 pci_dev
->config
[PCI_INTERRUPT_PIN
] = 1;
395 /* Define IO/MMIO regions */
396 memory_region_init_io(&s
->mmio
, OBJECT(s
), &mmio_ops
, s
,
397 "igb-mmio", E1000E_MMIO_SIZE
);
398 pci_register_bar(pci_dev
, E1000E_MMIO_IDX
,
399 PCI_BASE_ADDRESS_SPACE_MEMORY
, &s
->mmio
);
402 * We provide a dummy implementation for the flash BAR
403 * for drivers that may theoretically probe for its presence.
405 memory_region_init(&s
->flash
, OBJECT(s
),
406 "igb-flash", E1000E_FLASH_SIZE
);
407 pci_register_bar(pci_dev
, E1000E_FLASH_IDX
,
408 PCI_BASE_ADDRESS_SPACE_MEMORY
, &s
->flash
);
410 memory_region_init_io(&s
->io
, OBJECT(s
), &io_ops
, s
,
411 "igb-io", E1000E_IO_SIZE
);
412 pci_register_bar(pci_dev
, E1000E_IO_IDX
,
413 PCI_BASE_ADDRESS_SPACE_IO
, &s
->io
);
415 memory_region_init(&s
->msix
, OBJECT(s
), "igb-msix",
417 pci_register_bar(pci_dev
, E1000E_MSIX_IDX
,
418 PCI_BASE_ADDRESS_MEM_TYPE_64
, &s
->msix
);
420 /* Create networking backend */
421 qemu_macaddr_default_if_unset(&s
->conf
.macaddr
);
422 macaddr
= s
->conf
.macaddr
.a
;
424 /* Add PCI capabilities in reverse order */
425 assert(pcie_endpoint_cap_init(pci_dev
, 0xa0) > 0);
429 ret
= msi_init(pci_dev
, 0x50, 1, true, true, NULL
);
431 trace_e1000e_msi_init_fail(ret
);
434 if (igb_add_pm_capability(pci_dev
, 0x40, PCI_PM_CAP_DSI
) < 0) {
435 hw_error("Failed to initialize PM capability");
438 /* PCIe extended capabilities (in order) */
440 pcie_cap_flr_init(pci_dev
);
443 if (pcie_aer_init(pci_dev
, 1, 0x100, 0x40, errp
) < 0) {
444 hw_error("Failed to initialize AER capability");
447 pcie_ari_init(pci_dev
, 0x150);
449 pcie_sriov_pf_init(pci_dev
, IGB_CAP_SRIOV_OFFSET
, TYPE_IGBVF
,
450 IGB_82576_VF_DEV_ID
, IGB_MAX_VF_FUNCTIONS
, IGB_MAX_VF_FUNCTIONS
,
451 IGB_VF_OFFSET
, IGB_VF_STRIDE
);
453 pcie_sriov_pf_init_vf_bar(pci_dev
, IGBVF_MMIO_BAR_IDX
,
454 PCI_BASE_ADDRESS_MEM_TYPE_64
| PCI_BASE_ADDRESS_MEM_PREFETCH
,
456 pcie_sriov_pf_init_vf_bar(pci_dev
, IGBVF_MSIX_BAR_IDX
,
457 PCI_BASE_ADDRESS_MEM_TYPE_64
| PCI_BASE_ADDRESS_MEM_PREFETCH
,
460 igb_init_net_peer(s
, pci_dev
, macaddr
);
462 /* Initialize core */
465 igb_core_pci_realize(&s
->core
,
467 sizeof(igb_eeprom_template
),
471 static void igb_pci_uninit(PCIDevice
*pci_dev
)
473 IGBState
*s
= IGB(pci_dev
);
475 trace_e1000e_cb_pci_uninit();
477 igb_core_pci_uninit(&s
->core
);
479 pcie_sriov_pf_exit(pci_dev
);
480 pcie_cap_exit(pci_dev
);
482 qemu_del_nic(s
->nic
);
488 static void igb_qdev_reset_hold(Object
*obj
, ResetType type
)
490 IGBState
*s
= IGB(obj
);
492 trace_e1000e_cb_qdev_reset_hold();
494 igb_core_reset(&s
->core
);
497 static int igb_pre_save(void *opaque
)
499 IGBState
*s
= opaque
;
501 trace_e1000e_cb_pre_save();
503 igb_core_pre_save(&s
->core
);
508 static int igb_post_load(void *opaque
, int version_id
)
510 IGBState
*s
= opaque
;
512 trace_e1000e_cb_post_load();
513 return igb_core_post_load(&s
->core
);
516 static const VMStateDescription igb_vmstate_tx_ctx
= {
517 .name
= "igb-tx-ctx",
519 .minimum_version_id
= 1,
520 .fields
= (const VMStateField
[]) {
521 VMSTATE_UINT32(vlan_macip_lens
, struct e1000_adv_tx_context_desc
),
522 VMSTATE_UINT32(seqnum_seed
, struct e1000_adv_tx_context_desc
),
523 VMSTATE_UINT32(type_tucmd_mlhl
, struct e1000_adv_tx_context_desc
),
524 VMSTATE_UINT32(mss_l4len_idx
, struct e1000_adv_tx_context_desc
),
525 VMSTATE_END_OF_LIST()
529 static const VMStateDescription igb_vmstate_tx
= {
532 .minimum_version_id
= 2,
533 .fields
= (const VMStateField
[]) {
534 VMSTATE_STRUCT_ARRAY(ctx
, struct igb_tx
, 2, 0, igb_vmstate_tx_ctx
,
535 struct e1000_adv_tx_context_desc
),
536 VMSTATE_UINT32(first_cmd_type_len
, struct igb_tx
),
537 VMSTATE_UINT32(first_olinfo_status
, struct igb_tx
),
538 VMSTATE_BOOL(first
, struct igb_tx
),
539 VMSTATE_BOOL(skip_cp
, struct igb_tx
),
540 VMSTATE_END_OF_LIST()
544 static const VMStateDescription igb_vmstate_intr_timer
= {
545 .name
= "igb-intr-timer",
547 .minimum_version_id
= 1,
548 .fields
= (const VMStateField
[]) {
549 VMSTATE_TIMER_PTR(timer
, IGBIntrDelayTimer
),
550 VMSTATE_BOOL(running
, IGBIntrDelayTimer
),
551 VMSTATE_END_OF_LIST()
555 #define VMSTATE_IGB_INTR_DELAY_TIMER(_f, _s) \
556 VMSTATE_STRUCT(_f, _s, 0, \
557 igb_vmstate_intr_timer, IGBIntrDelayTimer)
559 #define VMSTATE_IGB_INTR_DELAY_TIMER_ARRAY(_f, _s, _num) \
560 VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \
561 igb_vmstate_intr_timer, IGBIntrDelayTimer)
563 static const VMStateDescription igb_vmstate
= {
566 .minimum_version_id
= 1,
567 .pre_save
= igb_pre_save
,
568 .post_load
= igb_post_load
,
569 .fields
= (const VMStateField
[]) {
570 VMSTATE_PCI_DEVICE(parent_obj
, IGBState
),
571 VMSTATE_MSIX(parent_obj
, IGBState
),
573 VMSTATE_UINT32(ioaddr
, IGBState
),
574 VMSTATE_UINT8(core
.rx_desc_len
, IGBState
),
575 VMSTATE_UINT16_ARRAY(core
.eeprom
, IGBState
, IGB_EEPROM_SIZE
),
576 VMSTATE_UINT16_ARRAY(core
.phy
, IGBState
, MAX_PHY_REG_ADDRESS
+ 1),
577 VMSTATE_UINT32_ARRAY(core
.mac
, IGBState
, E1000E_MAC_SIZE
),
578 VMSTATE_UINT8_ARRAY(core
.permanent_mac
, IGBState
, ETH_ALEN
),
580 VMSTATE_IGB_INTR_DELAY_TIMER_ARRAY(core
.eitr
, IGBState
,
583 VMSTATE_UINT32_ARRAY(core
.eitr_guest_value
, IGBState
, IGB_INTR_NUM
),
585 VMSTATE_STRUCT_ARRAY(core
.tx
, IGBState
, IGB_NUM_QUEUES
, 0,
586 igb_vmstate_tx
, struct igb_tx
),
588 VMSTATE_INT64(core
.timadj
, IGBState
),
590 VMSTATE_END_OF_LIST()
594 static Property igb_properties
[] = {
595 DEFINE_NIC_PROPERTIES(IGBState
, conf
),
596 DEFINE_PROP_BOOL("x-pcie-flr-init", IGBState
, has_flr
, true),
597 DEFINE_PROP_END_OF_LIST(),
600 static void igb_class_init(ObjectClass
*class, void *data
)
602 DeviceClass
*dc
= DEVICE_CLASS(class);
603 ResettableClass
*rc
= RESETTABLE_CLASS(class);
604 PCIDeviceClass
*c
= PCI_DEVICE_CLASS(class);
606 c
->realize
= igb_pci_realize
;
607 c
->exit
= igb_pci_uninit
;
608 c
->vendor_id
= PCI_VENDOR_ID_INTEL
;
609 c
->device_id
= E1000_DEV_ID_82576
;
611 c
->class_id
= PCI_CLASS_NETWORK_ETHERNET
;
613 rc
->phases
.hold
= igb_qdev_reset_hold
;
615 dc
->desc
= "Intel 82576 Gigabit Ethernet Controller";
616 dc
->vmsd
= &igb_vmstate
;
618 device_class_set_props(dc
, igb_properties
);
619 set_bit(DEVICE_CATEGORY_NETWORK
, dc
->categories
);
622 static void igb_instance_init(Object
*obj
)
624 IGBState
*s
= IGB(obj
);
625 device_add_bootindex_property(obj
, &s
->conf
.bootindex
,
626 "bootindex", "/ethernet-phy@0",
630 static const TypeInfo igb_info
= {
632 .parent
= TYPE_PCI_DEVICE
,
633 .instance_size
= sizeof(IGBState
),
634 .class_init
= igb_class_init
,
635 .instance_init
= igb_instance_init
,
636 .interfaces
= (InterfaceInfo
[]) {
637 { INTERFACE_PCIE_DEVICE
},
642 static void igb_register_types(void)
644 type_register_static(&igb_info
);
647 type_init(igb_register_types
)