pcie_sriov: Ensure VF function number does not overflow
[qemu/ar7.git] / hw / net / igb.c
blobb6ca2f1b8aeedc905ea6be455e2d91b47530eefa
1 /*
2 * QEMU Intel 82576 SR/IOV Ethernet Controller Emulation
4 * Datasheet:
5 * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/82576eg-gbe-datasheet.pdf
7 * Copyright (c) 2020-2023 Red Hat, Inc.
8 * Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
9 * Developed by Daynix Computing LTD (http://www.daynix.com)
11 * Authors:
12 * Akihiko Odaki <akihiko.odaki@daynix.com>
13 * Gal Hammmer <gal.hammer@sap.com>
14 * Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
15 * Dmitry Fleytman <dmitry@daynix.com>
16 * Leonid Bloch <leonid@daynix.com>
17 * Yan Vugenfirer <yan@daynix.com>
19 * Based on work done by:
20 * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
21 * Copyright (c) 2008 Qumranet
22 * Based on work done by:
23 * Copyright (c) 2007 Dan Aloni
24 * Copyright (c) 2004 Antony T Curtis
26 * This library is free software; you can redistribute it and/or
27 * modify it under the terms of the GNU Lesser General Public
28 * License as published by the Free Software Foundation; either
29 * version 2.1 of the License, or (at your option) any later version.
31 * This library is distributed in the hope that it will be useful,
32 * but WITHOUT ANY WARRANTY; without even the implied warranty of
33 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
34 * Lesser General Public License for more details.
36 * You should have received a copy of the GNU Lesser General Public
37 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
40 #include "qemu/osdep.h"
41 #include "qemu/units.h"
42 #include "net/eth.h"
43 #include "net/net.h"
44 #include "net/tap.h"
45 #include "qemu/module.h"
46 #include "qemu/range.h"
47 #include "sysemu/sysemu.h"
48 #include "hw/hw.h"
49 #include "hw/net/mii.h"
50 #include "hw/pci/pci.h"
51 #include "hw/pci/pcie.h"
52 #include "hw/pci/pcie_sriov.h"
53 #include "hw/pci/msi.h"
54 #include "hw/pci/msix.h"
55 #include "hw/qdev-properties.h"
56 #include "migration/vmstate.h"
58 #include "igb_common.h"
59 #include "igb_core.h"
61 #include "trace.h"
62 #include "qapi/error.h"
63 #include "qom/object.h"
65 #define TYPE_IGB "igb"
66 OBJECT_DECLARE_SIMPLE_TYPE(IGBState, IGB)
68 struct IGBState {
69 PCIDevice parent_obj;
70 NICState *nic;
71 NICConf conf;
73 MemoryRegion mmio;
74 MemoryRegion flash;
75 MemoryRegion io;
76 MemoryRegion msix;
78 uint32_t ioaddr;
80 IGBCore core;
81 bool has_flr;
84 #define IGB_CAP_SRIOV_OFFSET (0x160)
85 #define IGB_VF_OFFSET (0x80)
86 #define IGB_VF_STRIDE (2)
88 #define E1000E_MMIO_IDX 0
89 #define E1000E_FLASH_IDX 1
90 #define E1000E_IO_IDX 2
91 #define E1000E_MSIX_IDX 3
93 #define E1000E_MMIO_SIZE (128 * KiB)
94 #define E1000E_FLASH_SIZE (128 * KiB)
95 #define E1000E_IO_SIZE (32)
96 #define E1000E_MSIX_SIZE (16 * KiB)
98 static void igb_write_config(PCIDevice *dev, uint32_t addr,
99 uint32_t val, int len)
101 IGBState *s = IGB(dev);
103 trace_igb_write_config(addr, val, len);
104 pci_default_write_config(dev, addr, val, len);
105 if (s->has_flr) {
106 pcie_cap_flr_write_config(dev, addr, val, len);
109 if (range_covers_byte(addr, len, PCI_COMMAND) &&
110 (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
111 igb_start_recv(&s->core);
115 uint64_t
116 igb_mmio_read(void *opaque, hwaddr addr, unsigned size)
118 IGBState *s = opaque;
119 return igb_core_read(&s->core, addr, size);
122 void
123 igb_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
125 IGBState *s = opaque;
126 igb_core_write(&s->core, addr, val, size);
129 void igb_vf_reset(void *opaque, uint16_t vfn)
131 IGBState *s = opaque;
132 igb_core_vf_reset(&s->core, vfn);
135 static bool
136 igb_io_get_reg_index(IGBState *s, uint32_t *idx)
138 if (s->ioaddr < 0x1FFFF) {
139 *idx = s->ioaddr;
140 return true;
143 if (s->ioaddr < 0x7FFFF) {
144 trace_e1000e_wrn_io_addr_undefined(s->ioaddr);
145 return false;
148 if (s->ioaddr < 0xFFFFF) {
149 trace_e1000e_wrn_io_addr_flash(s->ioaddr);
150 return false;
153 trace_e1000e_wrn_io_addr_unknown(s->ioaddr);
154 return false;
157 static uint64_t
158 igb_io_read(void *opaque, hwaddr addr, unsigned size)
160 IGBState *s = opaque;
161 uint32_t idx = 0;
162 uint64_t val;
164 switch (addr) {
165 case E1000_IOADDR:
166 trace_e1000e_io_read_addr(s->ioaddr);
167 return s->ioaddr;
168 case E1000_IODATA:
169 if (igb_io_get_reg_index(s, &idx)) {
170 val = igb_core_read(&s->core, idx, sizeof(val));
171 trace_e1000e_io_read_data(idx, val);
172 return val;
174 return 0;
175 default:
176 trace_e1000e_wrn_io_read_unknown(addr);
177 return 0;
181 static void
182 igb_io_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
184 IGBState *s = opaque;
185 uint32_t idx = 0;
187 switch (addr) {
188 case E1000_IOADDR:
189 trace_e1000e_io_write_addr(val);
190 s->ioaddr = (uint32_t) val;
191 return;
192 case E1000_IODATA:
193 if (igb_io_get_reg_index(s, &idx)) {
194 trace_e1000e_io_write_data(idx, val);
195 igb_core_write(&s->core, idx, val, sizeof(val));
197 return;
198 default:
199 trace_e1000e_wrn_io_write_unknown(addr);
200 return;
204 static const MemoryRegionOps mmio_ops = {
205 .read = igb_mmio_read,
206 .write = igb_mmio_write,
207 .endianness = DEVICE_LITTLE_ENDIAN,
208 .impl = {
209 .min_access_size = 4,
210 .max_access_size = 4,
214 static const MemoryRegionOps io_ops = {
215 .read = igb_io_read,
216 .write = igb_io_write,
217 .endianness = DEVICE_LITTLE_ENDIAN,
218 .impl = {
219 .min_access_size = 4,
220 .max_access_size = 4,
224 static bool
225 igb_nc_can_receive(NetClientState *nc)
227 IGBState *s = qemu_get_nic_opaque(nc);
228 return igb_can_receive(&s->core);
231 static ssize_t
232 igb_nc_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
234 IGBState *s = qemu_get_nic_opaque(nc);
235 return igb_receive_iov(&s->core, iov, iovcnt);
238 static ssize_t
239 igb_nc_receive(NetClientState *nc, const uint8_t *buf, size_t size)
241 IGBState *s = qemu_get_nic_opaque(nc);
242 return igb_receive(&s->core, buf, size);
245 static void
246 igb_set_link_status(NetClientState *nc)
248 IGBState *s = qemu_get_nic_opaque(nc);
249 igb_core_set_link_status(&s->core);
252 static NetClientInfo net_igb_info = {
253 .type = NET_CLIENT_DRIVER_NIC,
254 .size = sizeof(NICState),
255 .can_receive = igb_nc_can_receive,
256 .receive = igb_nc_receive,
257 .receive_iov = igb_nc_receive_iov,
258 .link_status_changed = igb_set_link_status,
262 * EEPROM (NVM) contents documented in section 6.1, table 6-1:
263 * and in 6.10 Software accessed words.
265 static const uint16_t igb_eeprom_template[] = {
266 /* Address |Compat.|OEM sp.| ImRev | OEM sp. */
267 0x0000, 0x0000, 0x0000, 0x0d34, 0xffff, 0x2010, 0xffff, 0xffff,
268 /* PBA |ICtrl1 | SSID | SVID | DevID |-------|ICtrl2 */
269 0x1040, 0xffff, 0x002b, 0x0000, 0x8086, 0x10c9, 0x0000, 0x70c3,
270 /* SwPin0| DevID | EESZ |-------|ICtrl3 |PCI-tc | MSIX | APtr */
271 0x0004, 0x10c9, 0x5c00, 0x0000, 0x2880, 0x0014, 0x4a40, 0x0060,
272 /* PCIe Init. Conf 1,2,3 |PCICtrl| LD1,3 |DDevID |DevRev | LD0,2 */
273 0x6cfb, 0xc7b0, 0x0abe, 0x0403, 0x0783, 0x10a6, 0x0001, 0x0602,
274 /* SwPin1| FunC |LAN-PWR|ManHwC |ICtrl3 | IOVct |VDevID |-------*/
275 0x0004, 0x0020, 0x0000, 0x004a, 0x2080, 0x00f5, 0x10ca, 0x0000,
276 /*---------------| LD1,3 | LD0,2 | ROEnd | ROSta | Wdog | VPD */
277 0x0000, 0x0000, 0x4784, 0x4602, 0x0000, 0x0000, 0x1000, 0xffff,
278 /* PCSet0| Ccfg0 |PXEver |IBAcap |PCSet1 | Ccfg1 |iSCVer | ?? */
279 0x0100, 0x4000, 0x131f, 0x4013, 0x0100, 0x4000, 0xffff, 0xffff,
280 /* PCSet2| Ccfg2 |PCSet3 | Ccfg3 | ?? |AltMacP| ?? |CHKSUM */
281 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x00e0, 0xffff, 0x0000,
282 /* NC-SIC */
283 0x0003,
286 static void igb_core_realize(IGBState *s)
288 s->core.owner = &s->parent_obj;
289 s->core.owner_nic = s->nic;
292 static void
293 igb_init_msix(IGBState *s)
295 int i, res;
297 res = msix_init(PCI_DEVICE(s), IGB_MSIX_VEC_NUM,
298 &s->msix,
299 E1000E_MSIX_IDX, 0,
300 &s->msix,
301 E1000E_MSIX_IDX, 0x2000,
302 0x70, NULL);
304 if (res < 0) {
305 trace_e1000e_msix_init_fail(res);
306 } else {
307 for (i = 0; i < IGB_MSIX_VEC_NUM; i++) {
308 msix_vector_use(PCI_DEVICE(s), i);
313 static void
314 igb_cleanup_msix(IGBState *s)
316 msix_unuse_all_vectors(PCI_DEVICE(s));
317 msix_uninit(PCI_DEVICE(s), &s->msix, &s->msix);
320 static void
321 igb_init_net_peer(IGBState *s, PCIDevice *pci_dev, uint8_t *macaddr)
323 DeviceState *dev = DEVICE(pci_dev);
324 NetClientState *nc;
325 int i;
327 s->nic = qemu_new_nic(&net_igb_info, &s->conf,
328 object_get_typename(OBJECT(s)), dev->id, &dev->mem_reentrancy_guard, s);
330 s->core.max_queue_num = s->conf.peers.queues ? s->conf.peers.queues - 1 : 0;
332 trace_e1000e_mac_set_permanent(MAC_ARG(macaddr));
333 memcpy(s->core.permanent_mac, macaddr, sizeof(s->core.permanent_mac));
335 qemu_format_nic_info_str(qemu_get_queue(s->nic), macaddr);
337 /* Setup virtio headers */
338 for (i = 0; i < s->conf.peers.queues; i++) {
339 nc = qemu_get_subqueue(s->nic, i);
340 if (!nc->peer || !qemu_has_vnet_hdr(nc->peer)) {
341 trace_e1000e_cfg_support_virtio(false);
342 return;
346 trace_e1000e_cfg_support_virtio(true);
347 s->core.has_vnet = true;
349 for (i = 0; i < s->conf.peers.queues; i++) {
350 nc = qemu_get_subqueue(s->nic, i);
351 qemu_set_vnet_hdr_len(nc->peer, sizeof(struct virtio_net_hdr));
355 static int
356 igb_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc)
358 Error *local_err = NULL;
359 int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset,
360 PCI_PM_SIZEOF, &local_err);
362 if (local_err) {
363 error_report_err(local_err);
364 return ret;
367 pci_set_word(pdev->config + offset + PCI_PM_PMC,
368 PCI_PM_CAP_VER_1_1 |
369 pmc);
371 pci_set_word(pdev->wmask + offset + PCI_PM_CTRL,
372 PCI_PM_CTRL_STATE_MASK |
373 PCI_PM_CTRL_PME_ENABLE |
374 PCI_PM_CTRL_DATA_SEL_MASK);
376 pci_set_word(pdev->w1cmask + offset + PCI_PM_CTRL,
377 PCI_PM_CTRL_PME_STATUS);
379 return ret;
382 static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
384 IGBState *s = IGB(pci_dev);
385 uint8_t *macaddr;
386 int ret;
388 trace_e1000e_cb_pci_realize();
390 pci_dev->config_write = igb_write_config;
392 pci_dev->config[PCI_CACHE_LINE_SIZE] = 0x10;
393 pci_dev->config[PCI_INTERRUPT_PIN] = 1;
395 /* Define IO/MMIO regions */
396 memory_region_init_io(&s->mmio, OBJECT(s), &mmio_ops, s,
397 "igb-mmio", E1000E_MMIO_SIZE);
398 pci_register_bar(pci_dev, E1000E_MMIO_IDX,
399 PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio);
402 * We provide a dummy implementation for the flash BAR
403 * for drivers that may theoretically probe for its presence.
405 memory_region_init(&s->flash, OBJECT(s),
406 "igb-flash", E1000E_FLASH_SIZE);
407 pci_register_bar(pci_dev, E1000E_FLASH_IDX,
408 PCI_BASE_ADDRESS_SPACE_MEMORY, &s->flash);
410 memory_region_init_io(&s->io, OBJECT(s), &io_ops, s,
411 "igb-io", E1000E_IO_SIZE);
412 pci_register_bar(pci_dev, E1000E_IO_IDX,
413 PCI_BASE_ADDRESS_SPACE_IO, &s->io);
415 memory_region_init(&s->msix, OBJECT(s), "igb-msix",
416 E1000E_MSIX_SIZE);
417 pci_register_bar(pci_dev, E1000E_MSIX_IDX,
418 PCI_BASE_ADDRESS_MEM_TYPE_64, &s->msix);
420 /* Create networking backend */
421 qemu_macaddr_default_if_unset(&s->conf.macaddr);
422 macaddr = s->conf.macaddr.a;
424 /* Add PCI capabilities in reverse order */
425 assert(pcie_endpoint_cap_init(pci_dev, 0xa0) > 0);
427 igb_init_msix(s);
429 ret = msi_init(pci_dev, 0x50, 1, true, true, NULL);
430 if (ret) {
431 trace_e1000e_msi_init_fail(ret);
434 if (igb_add_pm_capability(pci_dev, 0x40, PCI_PM_CAP_DSI) < 0) {
435 hw_error("Failed to initialize PM capability");
438 /* PCIe extended capabilities (in order) */
439 if (s->has_flr) {
440 pcie_cap_flr_init(pci_dev);
443 if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
444 hw_error("Failed to initialize AER capability");
447 pcie_ari_init(pci_dev, 0x150);
449 if (!pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET,
450 TYPE_IGBVF, IGB_82576_VF_DEV_ID,
451 IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS,
452 IGB_VF_OFFSET, IGB_VF_STRIDE,
453 errp)) {
454 pcie_cap_exit(pci_dev);
455 igb_cleanup_msix(s);
456 msi_uninit(pci_dev);
457 return;
460 pcie_sriov_pf_init_vf_bar(pci_dev, IGBVF_MMIO_BAR_IDX,
461 PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH,
462 IGBVF_MMIO_SIZE);
463 pcie_sriov_pf_init_vf_bar(pci_dev, IGBVF_MSIX_BAR_IDX,
464 PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH,
465 IGBVF_MSIX_SIZE);
467 igb_init_net_peer(s, pci_dev, macaddr);
469 /* Initialize core */
470 igb_core_realize(s);
472 igb_core_pci_realize(&s->core,
473 igb_eeprom_template,
474 sizeof(igb_eeprom_template),
475 macaddr);
478 static void igb_pci_uninit(PCIDevice *pci_dev)
480 IGBState *s = IGB(pci_dev);
482 trace_e1000e_cb_pci_uninit();
484 igb_core_pci_uninit(&s->core);
486 pcie_sriov_pf_exit(pci_dev);
487 pcie_cap_exit(pci_dev);
489 qemu_del_nic(s->nic);
491 igb_cleanup_msix(s);
492 msi_uninit(pci_dev);
495 static void igb_qdev_reset_hold(Object *obj, ResetType type)
497 IGBState *s = IGB(obj);
499 trace_e1000e_cb_qdev_reset_hold();
501 igb_core_reset(&s->core);
504 static int igb_pre_save(void *opaque)
506 IGBState *s = opaque;
508 trace_e1000e_cb_pre_save();
510 igb_core_pre_save(&s->core);
512 return 0;
515 static int igb_post_load(void *opaque, int version_id)
517 IGBState *s = opaque;
519 trace_e1000e_cb_post_load();
520 return igb_core_post_load(&s->core);
523 static const VMStateDescription igb_vmstate_tx_ctx = {
524 .name = "igb-tx-ctx",
525 .version_id = 1,
526 .minimum_version_id = 1,
527 .fields = (const VMStateField[]) {
528 VMSTATE_UINT32(vlan_macip_lens, struct e1000_adv_tx_context_desc),
529 VMSTATE_UINT32(seqnum_seed, struct e1000_adv_tx_context_desc),
530 VMSTATE_UINT32(type_tucmd_mlhl, struct e1000_adv_tx_context_desc),
531 VMSTATE_UINT32(mss_l4len_idx, struct e1000_adv_tx_context_desc),
532 VMSTATE_END_OF_LIST()
536 static const VMStateDescription igb_vmstate_tx = {
537 .name = "igb-tx",
538 .version_id = 2,
539 .minimum_version_id = 2,
540 .fields = (const VMStateField[]) {
541 VMSTATE_STRUCT_ARRAY(ctx, struct igb_tx, 2, 0, igb_vmstate_tx_ctx,
542 struct e1000_adv_tx_context_desc),
543 VMSTATE_UINT32(first_cmd_type_len, struct igb_tx),
544 VMSTATE_UINT32(first_olinfo_status, struct igb_tx),
545 VMSTATE_BOOL(first, struct igb_tx),
546 VMSTATE_BOOL(skip_cp, struct igb_tx),
547 VMSTATE_END_OF_LIST()
551 static const VMStateDescription igb_vmstate_intr_timer = {
552 .name = "igb-intr-timer",
553 .version_id = 1,
554 .minimum_version_id = 1,
555 .fields = (const VMStateField[]) {
556 VMSTATE_TIMER_PTR(timer, IGBIntrDelayTimer),
557 VMSTATE_BOOL(running, IGBIntrDelayTimer),
558 VMSTATE_END_OF_LIST()
562 #define VMSTATE_IGB_INTR_DELAY_TIMER(_f, _s) \
563 VMSTATE_STRUCT(_f, _s, 0, \
564 igb_vmstate_intr_timer, IGBIntrDelayTimer)
566 #define VMSTATE_IGB_INTR_DELAY_TIMER_ARRAY(_f, _s, _num) \
567 VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \
568 igb_vmstate_intr_timer, IGBIntrDelayTimer)
570 static const VMStateDescription igb_vmstate = {
571 .name = "igb",
572 .version_id = 1,
573 .minimum_version_id = 1,
574 .pre_save = igb_pre_save,
575 .post_load = igb_post_load,
576 .fields = (const VMStateField[]) {
577 VMSTATE_PCI_DEVICE(parent_obj, IGBState),
578 VMSTATE_MSIX(parent_obj, IGBState),
580 VMSTATE_UINT32(ioaddr, IGBState),
581 VMSTATE_UINT8(core.rx_desc_len, IGBState),
582 VMSTATE_UINT16_ARRAY(core.eeprom, IGBState, IGB_EEPROM_SIZE),
583 VMSTATE_UINT16_ARRAY(core.phy, IGBState, MAX_PHY_REG_ADDRESS + 1),
584 VMSTATE_UINT32_ARRAY(core.mac, IGBState, E1000E_MAC_SIZE),
585 VMSTATE_UINT8_ARRAY(core.permanent_mac, IGBState, ETH_ALEN),
587 VMSTATE_IGB_INTR_DELAY_TIMER_ARRAY(core.eitr, IGBState,
588 IGB_INTR_NUM),
590 VMSTATE_UINT32_ARRAY(core.eitr_guest_value, IGBState, IGB_INTR_NUM),
592 VMSTATE_STRUCT_ARRAY(core.tx, IGBState, IGB_NUM_QUEUES, 0,
593 igb_vmstate_tx, struct igb_tx),
595 VMSTATE_INT64(core.timadj, IGBState),
597 VMSTATE_END_OF_LIST()
601 static Property igb_properties[] = {
602 DEFINE_NIC_PROPERTIES(IGBState, conf),
603 DEFINE_PROP_BOOL("x-pcie-flr-init", IGBState, has_flr, true),
604 DEFINE_PROP_END_OF_LIST(),
607 static void igb_class_init(ObjectClass *class, void *data)
609 DeviceClass *dc = DEVICE_CLASS(class);
610 ResettableClass *rc = RESETTABLE_CLASS(class);
611 PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
613 c->realize = igb_pci_realize;
614 c->exit = igb_pci_uninit;
615 c->vendor_id = PCI_VENDOR_ID_INTEL;
616 c->device_id = E1000_DEV_ID_82576;
617 c->revision = 1;
618 c->class_id = PCI_CLASS_NETWORK_ETHERNET;
620 rc->phases.hold = igb_qdev_reset_hold;
622 dc->desc = "Intel 82576 Gigabit Ethernet Controller";
623 dc->vmsd = &igb_vmstate;
625 device_class_set_props(dc, igb_properties);
626 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
629 static void igb_instance_init(Object *obj)
631 IGBState *s = IGB(obj);
632 device_add_bootindex_property(obj, &s->conf.bootindex,
633 "bootindex", "/ethernet-phy@0",
634 DEVICE(obj));
637 static const TypeInfo igb_info = {
638 .name = TYPE_IGB,
639 .parent = TYPE_PCI_DEVICE,
640 .instance_size = sizeof(IGBState),
641 .class_init = igb_class_init,
642 .instance_init = igb_instance_init,
643 .interfaces = (InterfaceInfo[]) {
644 { INTERFACE_PCIE_DEVICE },
649 static void igb_register_types(void)
651 type_register_static(&igb_info);
654 type_init(igb_register_types)