vfio/pci: Config window quirks
[qemu/kevin.git] / hw / vfio / pci-quirks.c
blob89e81213dd4e5dc261fd7d247b03f24b78ca1fb3
1 /*
2 * device quirks for PCI devices
4 * Copyright Red Hat, Inc. 2012-2015
6 * Authors:
7 * Alex Williamson <alex.williamson@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
13 #include "pci.h"
14 #include "trace.h"
15 #include "qemu/range.h"
17 #define PCI_ANY_ID (~0)
19 /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
20 static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device)
22 PCIDevice *pdev = &vdev->pdev;
24 return (vendor == PCI_ANY_ID ||
25 vendor == pci_get_word(pdev->config + PCI_VENDOR_ID)) &&
26 (device == PCI_ANY_ID ||
27 device == pci_get_word(pdev->config + PCI_DEVICE_ID));
31 * List of device ids/vendor ids for which to disable
32 * option rom loading. This avoids the guest hangs during rom
33 * execution as noticed with the BCM 57810 card for lack of a
34 * more better way to handle such issues.
35 * The user can still override by specifying a romfile or
36 * rombar=1.
37 * Please see https://bugs.launchpad.net/qemu/+bug/1284874
38 * for an analysis of the 57810 card hang. When adding
39 * a new vendor id/device id combination below, please also add
40 * your card/environment details and information that could
41 * help in debugging to the bug tracking this issue
43 static const struct {
44 uint32_t vendor;
45 uint32_t device;
46 } romblacklist[] = {
47 { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
50 bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
52 int i;
54 for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
55 if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
56 trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
57 romblacklist[i].vendor,
58 romblacklist[i].device);
59 return true;
62 return false;
66 * Device specific region quirks (mostly backdoors to PCI config space)
70 * The generic window quirks operate on an address and data register,
71 * vfio_generic_window_address_quirk handles the address register and
72 * vfio_generic_window_data_quirk handles the data register. These ops
73 * pass reads and writes through to hardware until a value matching the
74 * stored address match/mask is written. When this occurs, the data
75 * register access emulated PCI config space for the device rather than
76 * passing through accesses. This enables devices where PCI config space
77 * is accessible behind a window register to maintain the virtualization
78 * provided through vfio.
80 typedef struct VFIOConfigWindowMatch {
81 uint32_t match;
82 uint32_t mask;
83 } VFIOConfigWindowMatch;
85 typedef struct VFIOConfigWindowQuirk {
86 struct VFIOPCIDevice *vdev;
88 uint32_t address_val;
90 uint32_t address_offset;
91 uint32_t data_offset;
93 bool window_enabled;
94 uint8_t bar;
96 MemoryRegion *addr_mem;
97 MemoryRegion *data_mem;
99 uint32_t nr_matches;
100 VFIOConfigWindowMatch matches[];
101 } VFIOConfigWindowQuirk;
103 static uint64_t vfio_generic_window_quirk_address_read(void *opaque,
104 hwaddr addr,
105 unsigned size)
107 VFIOConfigWindowQuirk *window = opaque;
108 VFIOPCIDevice *vdev = window->vdev;
110 return vfio_region_read(&vdev->bars[window->bar].region,
111 addr + window->address_offset, size);
114 static void vfio_generic_window_quirk_address_write(void *opaque, hwaddr addr,
115 uint64_t data,
116 unsigned size)
118 VFIOConfigWindowQuirk *window = opaque;
119 VFIOPCIDevice *vdev = window->vdev;
120 int i;
122 window->window_enabled = false;
124 vfio_region_write(&vdev->bars[window->bar].region,
125 addr + window->address_offset, data, size);
127 for (i = 0; i < window->nr_matches; i++) {
128 if ((data & ~window->matches[i].mask) == window->matches[i].match) {
129 window->window_enabled = true;
130 window->address_val = data & window->matches[i].mask;
131 trace_vfio_quirk_generic_window_address_write(vdev->vbasedev.name,
132 memory_region_name(window->addr_mem), data);
133 break;
138 static const MemoryRegionOps vfio_generic_window_address_quirk = {
139 .read = vfio_generic_window_quirk_address_read,
140 .write = vfio_generic_window_quirk_address_write,
141 .endianness = DEVICE_LITTLE_ENDIAN,
144 static uint64_t vfio_generic_window_quirk_data_read(void *opaque,
145 hwaddr addr, unsigned size)
147 VFIOConfigWindowQuirk *window = opaque;
148 VFIOPCIDevice *vdev = window->vdev;
149 uint64_t data;
151 /* Always read data reg, discard if window enabled */
152 data = vfio_region_read(&vdev->bars[window->bar].region,
153 addr + window->data_offset, size);
155 if (window->window_enabled) {
156 data = vfio_pci_read_config(&vdev->pdev, window->address_val, size);
157 trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name,
158 memory_region_name(window->data_mem), data);
161 return data;
164 static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr,
165 uint64_t data, unsigned size)
167 VFIOConfigWindowQuirk *window = opaque;
168 VFIOPCIDevice *vdev = window->vdev;
170 if (window->window_enabled) {
171 vfio_pci_write_config(&vdev->pdev, window->address_val, data, size);
172 trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name,
173 memory_region_name(window->data_mem), data);
174 return;
177 vfio_region_write(&vdev->bars[window->bar].region,
178 addr + window->data_offset, data, size);
181 static const MemoryRegionOps vfio_generic_window_data_quirk = {
182 .read = vfio_generic_window_quirk_data_read,
183 .write = vfio_generic_window_quirk_data_write,
184 .endianness = DEVICE_LITTLE_ENDIAN,
187 /* Is range1 fully contained within range2? */
188 static bool vfio_range_contained(uint64_t first1, uint64_t len1,
189 uint64_t first2, uint64_t len2) {
190 return (first1 >= first2 && first1 + len1 <= first2 + len2);
193 static bool vfio_flags_enabled(uint8_t flags, uint8_t mask)
195 return (mask && (flags & mask) == mask);
198 static uint64_t vfio_generic_window_quirk_read(void *opaque,
199 hwaddr addr, unsigned size)
201 VFIOLegacyQuirk *quirk = opaque;
202 VFIOPCIDevice *vdev = quirk->vdev;
203 uint64_t data;
205 if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) &&
206 ranges_overlap(addr, size,
207 quirk->data.data_offset, quirk->data.data_size)) {
208 hwaddr offset = addr - quirk->data.data_offset;
210 if (!vfio_range_contained(addr, size, quirk->data.data_offset,
211 quirk->data.data_size)) {
212 hw_error("%s: window data read not fully contained: %s",
213 __func__, memory_region_name(quirk->mem));
216 data = vfio_pci_read_config(&vdev->pdev,
217 quirk->data.address_val + offset, size);
219 trace_vfio_generic_window_quirk_read(memory_region_name(quirk->mem),
220 vdev->vbasedev.name,
221 quirk->data.bar,
222 addr, size, data);
223 } else {
224 data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
225 addr + quirk->data.base_offset, size);
228 return data;
231 static void vfio_generic_window_quirk_write(void *opaque, hwaddr addr,
232 uint64_t data, unsigned size)
234 VFIOLegacyQuirk *quirk = opaque;
235 VFIOPCIDevice *vdev = quirk->vdev;
237 if (ranges_overlap(addr, size,
238 quirk->data.address_offset, quirk->data.address_size)) {
240 if (addr != quirk->data.address_offset) {
241 hw_error("%s: offset write into address window: %s",
242 __func__, memory_region_name(quirk->mem));
245 if ((data & ~quirk->data.address_mask) == quirk->data.address_match) {
246 quirk->data.flags |= quirk->data.write_flags |
247 quirk->data.read_flags;
248 quirk->data.address_val = data & quirk->data.address_mask;
249 } else {
250 quirk->data.flags &= ~(quirk->data.write_flags |
251 quirk->data.read_flags);
255 if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) &&
256 ranges_overlap(addr, size,
257 quirk->data.data_offset, quirk->data.data_size)) {
258 hwaddr offset = addr - quirk->data.data_offset;
260 if (!vfio_range_contained(addr, size, quirk->data.data_offset,
261 quirk->data.data_size)) {
262 hw_error("%s: window data write not fully contained: %s",
263 __func__, memory_region_name(quirk->mem));
266 vfio_pci_write_config(&vdev->pdev,
267 quirk->data.address_val + offset, data, size);
268 trace_vfio_generic_window_quirk_write(memory_region_name(quirk->mem),
269 vdev->vbasedev.name,
270 quirk->data.bar,
271 addr, data, size);
272 return;
275 vfio_region_write(&vdev->bars[quirk->data.bar].region,
276 addr + quirk->data.base_offset, data, size);
279 static const MemoryRegionOps vfio_generic_window_quirk = {
280 .read = vfio_generic_window_quirk_read,
281 .write = vfio_generic_window_quirk_write,
282 .endianness = DEVICE_LITTLE_ENDIAN,
285 static uint64_t vfio_generic_quirk_read(void *opaque,
286 hwaddr addr, unsigned size)
288 VFIOLegacyQuirk *quirk = opaque;
289 VFIOPCIDevice *vdev = quirk->vdev;
290 hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
291 hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK;
292 uint64_t data;
294 if (vfio_flags_enabled(quirk->data.flags, quirk->data.read_flags) &&
295 ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) {
296 if (!vfio_range_contained(addr, size, offset,
297 quirk->data.address_mask + 1)) {
298 hw_error("%s: read not fully contained: %s",
299 __func__, memory_region_name(quirk->mem));
302 data = vfio_pci_read_config(&vdev->pdev, addr - offset, size);
304 trace_vfio_generic_quirk_read(memory_region_name(quirk->mem),
305 vdev->vbasedev.name, quirk->data.bar,
306 addr + base, size, data);
307 } else {
308 data = vfio_region_read(&vdev->bars[quirk->data.bar].region,
309 addr + base, size);
312 return data;
315 static void vfio_generic_quirk_write(void *opaque, hwaddr addr,
316 uint64_t data, unsigned size)
318 VFIOLegacyQuirk *quirk = opaque;
319 VFIOPCIDevice *vdev = quirk->vdev;
320 hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
321 hwaddr offset = quirk->data.address_match & ~TARGET_PAGE_MASK;
323 if (vfio_flags_enabled(quirk->data.flags, quirk->data.write_flags) &&
324 ranges_overlap(addr, size, offset, quirk->data.address_mask + 1)) {
325 if (!vfio_range_contained(addr, size, offset,
326 quirk->data.address_mask + 1)) {
327 hw_error("%s: write not fully contained: %s",
328 __func__, memory_region_name(quirk->mem));
331 vfio_pci_write_config(&vdev->pdev, addr - offset, data, size);
333 trace_vfio_generic_quirk_write(memory_region_name(quirk->mem),
334 vdev->vbasedev.name, quirk->data.bar,
335 addr + base, data, size);
336 } else {
337 vfio_region_write(&vdev->bars[quirk->data.bar].region,
338 addr + base, data, size);
342 static const MemoryRegionOps vfio_generic_quirk = {
343 .read = vfio_generic_quirk_read,
344 .write = vfio_generic_quirk_write,
345 .endianness = DEVICE_LITTLE_ENDIAN,
348 #define PCI_VENDOR_ID_ATI 0x1002
351 * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
352 * through VGA register 0x3c3. On newer cards, the I/O port BAR is always
353 * BAR4 (older cards like the X550 used BAR1, but we don't care to support
354 * those). Note that on bare metal, a read of 0x3c3 doesn't always return the
355 * I/O port BAR address. Originally this was coded to return the virtual BAR
356 * address only if the physical register read returns the actual BAR address,
357 * but users have reported greater success if we return the virtual address
358 * unconditionally.
360 static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
361 hwaddr addr, unsigned size)
363 VFIOPCIDevice *vdev = opaque;
364 uint64_t data = vfio_pci_read_config(&vdev->pdev,
365 PCI_BASE_ADDRESS_4 + 1, size);
367 trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
369 return data;
372 static const MemoryRegionOps vfio_ati_3c3_quirk = {
373 .read = vfio_ati_3c3_quirk_read,
374 .endianness = DEVICE_LITTLE_ENDIAN,
377 static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
379 VFIOQuirk *quirk;
382 * As long as the BAR is >= 256 bytes it will be aligned such that the
383 * lower byte is always zero. Filter out anything else, if it exists.
385 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
386 !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
387 return;
390 quirk = g_malloc0(sizeof(*quirk));
391 quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
392 quirk->nr_mem = 1;
394 memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
395 "vfio-ati-3c3-quirk", 1);
396 memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
397 3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
399 QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
400 quirk, next);
402 trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
406 * Newer ATI/AMD devices, including HD5450 and HD7850, have a mirror to PCI
407 * config space through MMIO BAR2 at offset 0x4000. Nothing seems to access
408 * the MMIO space directly, but a window to this space is provided through
409 * I/O port BAR4. Offset 0x0 is the address register and offset 0x4 is the
410 * data register. When the address is programmed to a range of 0x4000-0x4fff
411 * PCI configuration space is available. Experimentation seems to indicate
412 * that read-only may be provided by hardware.
414 static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
416 VFIOQuirk *quirk;
417 VFIOConfigWindowQuirk *window;
419 /* This windows doesn't seem to be used except by legacy VGA code */
420 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
421 !vdev->has_vga || nr != 4) {
422 return;
425 quirk = g_malloc0(sizeof(*quirk));
426 quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 2);
427 quirk->nr_mem = 2;
428 window = quirk->data = g_malloc0(sizeof(*window) +
429 sizeof(VFIOConfigWindowMatch));
430 window->vdev = vdev;
431 window->address_offset = 0;
432 window->data_offset = 4;
433 window->nr_matches = 1;
434 window->matches[0].match = 0x4000;
435 window->matches[0].mask = PCIE_CONFIG_SPACE_SIZE - 1;
436 window->bar = nr;
437 window->addr_mem = &quirk->mem[0];
438 window->data_mem = &quirk->mem[1];
440 memory_region_init_io(window->addr_mem, OBJECT(vdev),
441 &vfio_generic_window_address_quirk, window,
442 "vfio-ati-bar4-window-address-quirk", 4);
443 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
444 window->address_offset,
445 window->addr_mem, 1);
447 memory_region_init_io(window->data_mem, OBJECT(vdev),
448 &vfio_generic_window_data_quirk, window,
449 "vfio-ati-bar4-window-data-quirk", 4);
450 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
451 window->data_offset,
452 window->data_mem, 1);
454 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
456 trace_vfio_quirk_ati_bar4_probe(vdev->vbasedev.name);
460 * Trap the BAR2 MMIO window to config space as well.
462 static void vfio_probe_ati_bar2_4000_quirk(VFIOPCIDevice *vdev, int nr)
464 PCIDevice *pdev = &vdev->pdev;
465 VFIOQuirk *quirk;
466 VFIOLegacyQuirk *legacy;
468 /* Only enable on newer devices where BAR2 is 64bit */
469 if (!vdev->has_vga || nr != 2 || !vdev->bars[2].mem64 ||
470 pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_ATI) {
471 return;
474 quirk = g_malloc0(sizeof(*quirk));
475 quirk->data = legacy = g_malloc0(sizeof(*legacy));
476 quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
477 quirk->nr_mem = 1;
478 legacy->vdev = vdev;
479 legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1;
480 legacy->data.address_match = 0x4000;
481 legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
482 legacy->data.bar = nr;
484 memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_generic_quirk, legacy,
485 "vfio-ati-bar2-4000-quirk",
486 TARGET_PAGE_ALIGN(legacy->data.address_mask + 1));
487 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
488 legacy->data.address_match & TARGET_PAGE_MASK,
489 quirk->mem, 1);
491 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
493 trace_vfio_probe_ati_bar2_4000_quirk(vdev->vbasedev.name);
497 * Older ATI/AMD cards like the X550 have a similar window to that above.
498 * I/O port BAR1 provides a window to a mirror of PCI config space located
499 * in BAR2 at offset 0xf00. We don't care to support such older cards, but
500 * note it for future reference.
503 #define PCI_VENDOR_ID_NVIDIA 0x10de
506 * Nvidia has several different methods to get to config space, the
507 * nouveu project has several of these documented here:
508 * https://github.com/pathscale/envytools/tree/master/hwdocs
510 * The first quirk is actually not documented in envytools and is found
511 * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]). This is an
512 * NV46 chipset. The backdoor uses the legacy VGA I/O ports to access
513 * the mirror of PCI config space found at BAR0 offset 0x1800. The access
514 * sequence first writes 0x338 to I/O port 0x3d4. The target offset is
515 * then written to 0x3d0. Finally 0x538 is written for a read and 0x738
516 * is written for a write to 0x3d4. The BAR0 offset is then accessible
517 * through 0x3d0. This quirk doesn't seem to be necessary on newer cards
518 * that use the I/O port BAR5 window but it doesn't hurt to leave it.
520 typedef enum {NONE = 0, SELECT, WINDOW, READ, WRITE} VFIONvidia3d0State;
521 static const char *nv3d0_states[] = { "NONE", "SELECT",
522 "WINDOW", "READ", "WRITE" };
524 typedef struct VFIONvidia3d0Quirk {
525 VFIOPCIDevice *vdev;
526 VFIONvidia3d0State state;
527 uint32_t offset;
528 } VFIONvidia3d0Quirk;
530 static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
531 hwaddr addr, unsigned size)
533 VFIONvidia3d0Quirk *quirk = opaque;
534 VFIOPCIDevice *vdev = quirk->vdev;
536 quirk->state = NONE;
538 return vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
539 addr + 0x14, size);
542 static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
543 uint64_t data, unsigned size)
545 VFIONvidia3d0Quirk *quirk = opaque;
546 VFIOPCIDevice *vdev = quirk->vdev;
547 VFIONvidia3d0State old_state = quirk->state;
549 quirk->state = NONE;
551 switch (data) {
552 case 0x338:
553 if (old_state == NONE) {
554 quirk->state = SELECT;
555 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
556 nv3d0_states[quirk->state]);
558 break;
559 case 0x538:
560 if (old_state == WINDOW) {
561 quirk->state = READ;
562 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
563 nv3d0_states[quirk->state]);
565 break;
566 case 0x738:
567 if (old_state == WINDOW) {
568 quirk->state = WRITE;
569 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
570 nv3d0_states[quirk->state]);
572 break;
575 vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
576 addr + 0x14, data, size);
579 static const MemoryRegionOps vfio_nvidia_3d4_quirk = {
580 .read = vfio_nvidia_3d4_quirk_read,
581 .write = vfio_nvidia_3d4_quirk_write,
582 .endianness = DEVICE_LITTLE_ENDIAN,
585 static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
586 hwaddr addr, unsigned size)
588 VFIONvidia3d0Quirk *quirk = opaque;
589 VFIOPCIDevice *vdev = quirk->vdev;
590 VFIONvidia3d0State old_state = quirk->state;
591 uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
592 addr + 0x10, size);
594 quirk->state = NONE;
596 if (old_state == READ &&
597 (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
598 uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
600 data = vfio_pci_read_config(&vdev->pdev, offset, size);
601 trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name,
602 offset, size, data);
605 return data;
608 static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
609 uint64_t data, unsigned size)
611 VFIONvidia3d0Quirk *quirk = opaque;
612 VFIOPCIDevice *vdev = quirk->vdev;
613 VFIONvidia3d0State old_state = quirk->state;
615 quirk->state = NONE;
617 if (old_state == SELECT) {
618 quirk->offset = (uint32_t)data;
619 quirk->state = WINDOW;
620 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
621 nv3d0_states[quirk->state]);
622 } else if (old_state == WRITE) {
623 if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
624 uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
626 vfio_pci_write_config(&vdev->pdev, offset, data, size);
627 trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name,
628 offset, data, size);
629 return;
633 vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
634 addr + 0x10, data, size);
637 static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
638 .read = vfio_nvidia_3d0_quirk_read,
639 .write = vfio_nvidia_3d0_quirk_write,
640 .endianness = DEVICE_LITTLE_ENDIAN,
643 static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
645 VFIOQuirk *quirk;
646 VFIONvidia3d0Quirk *data;
648 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
649 !vdev->bars[1].region.size) {
650 return;
653 quirk = g_malloc0(sizeof(*quirk));
654 quirk->data = data = g_malloc0(sizeof(*data));
655 quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 2);
656 quirk->nr_mem = 2;
657 data->vdev = vdev;
659 memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
660 data, "vfio-nvidia-3d4-quirk", 2);
661 memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
662 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]);
664 memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
665 data, "vfio-nvidia-3d0-quirk", 2);
666 memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
667 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]);
669 QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
670 quirk, next);
672 trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
676 * The second quirk is documented in envytools. The I/O port BAR5 is just
677 * a set of address/data ports to the MMIO BARs. The BAR we care about is
678 * again BAR0. This backdoor is apparently a bit newer than the one above
679 * so we need to not only trap 256 bytes @0x1800, but all of PCI config
680 * space, including extended space is available at the 4k @0x88000.
682 typedef struct VFIONvidiaBAR5Quirk {
683 uint32_t master;
684 uint32_t enable;
685 MemoryRegion *addr_mem;
686 MemoryRegion *data_mem;
687 bool enabled;
688 VFIOConfigWindowQuirk window; /* last for match data */
689 } VFIONvidiaBAR5Quirk;
691 static void vfio_nvidia_bar5_enable(VFIONvidiaBAR5Quirk *bar5)
693 VFIOPCIDevice *vdev = bar5->window.vdev;
695 if (((bar5->master & bar5->enable) & 0x1) == bar5->enabled) {
696 return;
699 bar5->enabled = !bar5->enabled;
700 trace_vfio_quirk_nvidia_bar5_state(vdev->vbasedev.name,
701 bar5->enabled ? "Enable" : "Disable");
702 memory_region_set_enabled(bar5->addr_mem, bar5->enabled);
703 memory_region_set_enabled(bar5->data_mem, bar5->enabled);
706 static uint64_t vfio_nvidia_bar5_quirk_master_read(void *opaque,
707 hwaddr addr, unsigned size)
709 VFIONvidiaBAR5Quirk *bar5 = opaque;
710 VFIOPCIDevice *vdev = bar5->window.vdev;
712 return vfio_region_read(&vdev->bars[5].region, addr, size);
715 static void vfio_nvidia_bar5_quirk_master_write(void *opaque, hwaddr addr,
716 uint64_t data, unsigned size)
718 VFIONvidiaBAR5Quirk *bar5 = opaque;
719 VFIOPCIDevice *vdev = bar5->window.vdev;
721 vfio_region_write(&vdev->bars[5].region, addr, data, size);
723 bar5->master = data;
724 vfio_nvidia_bar5_enable(bar5);
727 static const MemoryRegionOps vfio_nvidia_bar5_quirk_master = {
728 .read = vfio_nvidia_bar5_quirk_master_read,
729 .write = vfio_nvidia_bar5_quirk_master_write,
730 .endianness = DEVICE_LITTLE_ENDIAN,
733 static uint64_t vfio_nvidia_bar5_quirk_enable_read(void *opaque,
734 hwaddr addr, unsigned size)
736 VFIONvidiaBAR5Quirk *bar5 = opaque;
737 VFIOPCIDevice *vdev = bar5->window.vdev;
739 return vfio_region_read(&vdev->bars[5].region, addr + 4, size);
742 static void vfio_nvidia_bar5_quirk_enable_write(void *opaque, hwaddr addr,
743 uint64_t data, unsigned size)
745 VFIONvidiaBAR5Quirk *bar5 = opaque;
746 VFIOPCIDevice *vdev = bar5->window.vdev;
748 vfio_region_write(&vdev->bars[5].region, addr + 4, data, size);
750 bar5->enable = data;
751 vfio_nvidia_bar5_enable(bar5);
754 static const MemoryRegionOps vfio_nvidia_bar5_quirk_enable = {
755 .read = vfio_nvidia_bar5_quirk_enable_read,
756 .write = vfio_nvidia_bar5_quirk_enable_write,
757 .endianness = DEVICE_LITTLE_ENDIAN,
760 static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
762 VFIOQuirk *quirk;
763 VFIONvidiaBAR5Quirk *bar5;
764 VFIOConfigWindowQuirk *window;
766 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
767 !vdev->has_vga || nr != 5) {
768 return;
771 quirk = g_malloc0(sizeof(*quirk));
772 quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 4);
773 quirk->nr_mem = 4;
774 bar5 = quirk->data = g_malloc0(sizeof(*bar5) +
775 (sizeof(VFIOConfigWindowMatch) * 2));
776 window = &bar5->window;
778 window->vdev = vdev;
779 window->address_offset = 0x8;
780 window->data_offset = 0xc;
781 window->nr_matches = 2;
782 window->matches[0].match = 0x1800;
783 window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1;
784 window->matches[1].match = 0x88000;
785 window->matches[1].mask = PCIE_CONFIG_SPACE_SIZE - 1;
786 window->bar = nr;
787 window->addr_mem = bar5->addr_mem = &quirk->mem[0];
788 window->data_mem = bar5->data_mem = &quirk->mem[1];
790 memory_region_init_io(window->addr_mem, OBJECT(vdev),
791 &vfio_generic_window_address_quirk, window,
792 "vfio-nvidia-bar5-window-address-quirk", 4);
793 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
794 window->address_offset,
795 window->addr_mem, 1);
796 memory_region_set_enabled(window->addr_mem, false);
798 memory_region_init_io(window->data_mem, OBJECT(vdev),
799 &vfio_generic_window_data_quirk, window,
800 "vfio-nvidia-bar5-window-data-quirk", 4);
801 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
802 window->data_offset,
803 window->data_mem, 1);
804 memory_region_set_enabled(window->data_mem, false);
806 memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
807 &vfio_nvidia_bar5_quirk_master, bar5,
808 "vfio-nvidia-bar5-master-quirk", 4);
809 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
810 0, &quirk->mem[2], 1);
812 memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
813 &vfio_nvidia_bar5_quirk_enable, bar5,
814 "vfio-nvidia-bar5-enable-quirk", 4);
815 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
816 4, &quirk->mem[3], 1);
818 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
820 trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
823 static void vfio_nvidia_88000_quirk_write(void *opaque, hwaddr addr,
824 uint64_t data, unsigned size)
826 VFIOLegacyQuirk *quirk = opaque;
827 VFIOPCIDevice *vdev = quirk->vdev;
828 PCIDevice *pdev = &vdev->pdev;
829 hwaddr base = quirk->data.address_match & TARGET_PAGE_MASK;
831 vfio_generic_quirk_write(opaque, addr, data, size);
834 * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
835 * MSI capability ID register. Both the ID and next register are
836 * read-only, so we allow writes covering either of those to real hw.
837 * NB - only fixed for the 0x88000 MMIO window.
839 if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
840 vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
841 vfio_region_write(&vdev->bars[quirk->data.bar].region,
842 addr + base, data, size);
846 static const MemoryRegionOps vfio_nvidia_88000_quirk = {
847 .read = vfio_generic_quirk_read,
848 .write = vfio_nvidia_88000_quirk_write,
849 .endianness = DEVICE_LITTLE_ENDIAN,
853 * Finally, BAR0 itself. We want to redirect any accesses to either
854 * 0x1800 or 0x88000 through the PCI config space access functions.
856 * NB - quirk at a page granularity or else they don't seem to work when
857 * BARs are mmap'd
859 * Here's offset 0x88000...
861 static void vfio_probe_nvidia_bar0_88000_quirk(VFIOPCIDevice *vdev, int nr)
863 PCIDevice *pdev = &vdev->pdev;
864 VFIOQuirk *quirk;
865 VFIOLegacyQuirk *legacy;
866 uint16_t vendor, class;
868 vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
869 class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
871 if (nr != 0 || vendor != PCI_VENDOR_ID_NVIDIA ||
872 class != PCI_CLASS_DISPLAY_VGA) {
873 return;
876 quirk = g_malloc0(sizeof(*quirk));
877 quirk->data = legacy = g_malloc0(sizeof(*legacy));
878 quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
879 quirk->nr_mem = 1;
880 legacy->vdev = vdev;
881 legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1;
882 legacy->data.address_match = 0x88000;
883 legacy->data.address_mask = PCIE_CONFIG_SPACE_SIZE - 1;
884 legacy->data.bar = nr;
886 memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_nvidia_88000_quirk,
887 legacy, "vfio-nvidia-bar0-88000-quirk",
888 TARGET_PAGE_ALIGN(legacy->data.address_mask + 1));
889 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
890 legacy->data.address_match & TARGET_PAGE_MASK,
891 quirk->mem, 1);
893 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
895 trace_vfio_probe_nvidia_bar0_88000_quirk(vdev->vbasedev.name);
899 * And here's the same for BAR0 offset 0x1800...
901 static void vfio_probe_nvidia_bar0_1800_quirk(VFIOPCIDevice *vdev, int nr)
903 PCIDevice *pdev = &vdev->pdev;
904 VFIOQuirk *quirk;
905 VFIOLegacyQuirk *legacy;
907 if (!vdev->has_vga || nr != 0 ||
908 pci_get_word(pdev->config + PCI_VENDOR_ID) != PCI_VENDOR_ID_NVIDIA) {
909 return;
912 /* Log the chipset ID */
913 trace_vfio_probe_nvidia_bar0_1800_quirk_id(
914 (unsigned int)(vfio_region_read(&vdev->bars[0].region, 0, 4) >> 20)
915 & 0xff);
917 quirk = g_malloc0(sizeof(*quirk));
918 quirk->data = legacy = g_malloc0(sizeof(*legacy));
919 quirk->mem = legacy->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
920 quirk->nr_mem = 1;
921 legacy->vdev = vdev;
922 legacy->data.flags = legacy->data.read_flags = legacy->data.write_flags = 1;
923 legacy->data.address_match = 0x1800;
924 legacy->data.address_mask = PCI_CONFIG_SPACE_SIZE - 1;
925 legacy->data.bar = nr;
927 memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_generic_quirk, legacy,
928 "vfio-nvidia-bar0-1800-quirk",
929 TARGET_PAGE_ALIGN(legacy->data.address_mask + 1));
930 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
931 legacy->data.address_match & TARGET_PAGE_MASK,
932 quirk->mem, 1);
934 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
936 trace_vfio_probe_nvidia_bar0_1800_quirk(vdev->vbasedev.name);
940 * TODO - Some Nvidia devices provide config access to their companion HDA
941 * device and even to their parent bridge via these config space mirrors.
942 * Add quirks for those regions.
945 #define PCI_VENDOR_ID_REALTEK 0x10ec
948 * RTL8168 devices have a backdoor that can access the MSI-X table. At BAR2
949 * offset 0x70 there is a dword data register, offset 0x74 is a dword address
950 * register. According to the Linux r8169 driver, the MSI-X table is addressed
951 * when the "type" portion of the address register is set to 0x1. This appears
952 * to be bits 16:30. Bit 31 is both a write indicator and some sort of
953 * "address latched" indicator. Bits 12:15 are a mask field, which we can
954 * ignore because the MSI-X table should always be accessed as a dword (full
955 * mask). Bits 0:11 is offset within the type.
957 * Example trace:
959 * Read from MSI-X table offset 0
960 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
961 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
962 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
964 * Write 0xfee00000 to MSI-X table offset 0
965 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
966 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
967 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
969 typedef struct VFIOrtl8168Quirk {
970 VFIOPCIDevice *vdev;
971 uint32_t addr;
972 uint32_t data;
973 bool enabled;
974 } VFIOrtl8168Quirk;
976 static uint64_t vfio_rtl8168_quirk_address_read(void *opaque,
977 hwaddr addr, unsigned size)
979 VFIOrtl8168Quirk *rtl = opaque;
980 VFIOPCIDevice *vdev = rtl->vdev;
981 uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
983 if (rtl->enabled) {
984 data = rtl->addr ^ 0x80000000U; /* latch/complete */
985 trace_vfio_quirk_rtl8168_fake_latch(vdev->vbasedev.name, data);
988 return data;
991 static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
992 uint64_t data, unsigned size)
994 VFIOrtl8168Quirk *rtl = opaque;
995 VFIOPCIDevice *vdev = rtl->vdev;
997 rtl->enabled = false;
999 if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
1000 rtl->enabled = true;
1001 rtl->addr = (uint32_t)data;
1003 if (data & 0x80000000U) { /* Do write */
1004 if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
1005 hwaddr offset = data & 0xfff;
1006 uint64_t val = rtl->data;
1008 trace_vfio_quirk_rtl8168_msix_write(vdev->vbasedev.name,
1009 (uint16_t)offset, val);
1011 /* Write to the proper guest MSI-X table instead */
1012 memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
1013 offset, val, size,
1014 MEMTXATTRS_UNSPECIFIED);
1016 return; /* Do not write guest MSI-X data to hardware */
1020 vfio_region_write(&vdev->bars[2].region, addr + 0x74, data, size);
1023 static const MemoryRegionOps vfio_rtl_address_quirk = {
1024 .read = vfio_rtl8168_quirk_address_read,
1025 .write = vfio_rtl8168_quirk_address_write,
1026 .valid = {
1027 .min_access_size = 4,
1028 .max_access_size = 4,
1029 .unaligned = false,
1031 .endianness = DEVICE_LITTLE_ENDIAN,
1034 static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
1035 hwaddr addr, unsigned size)
1037 VFIOrtl8168Quirk *rtl = opaque;
1038 VFIOPCIDevice *vdev = rtl->vdev;
1039 uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
1041 if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
1042 hwaddr offset = rtl->addr & 0xfff;
1043 memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset,
1044 &data, size, MEMTXATTRS_UNSPECIFIED);
1045 trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data);
1048 return data;
1051 static void vfio_rtl8168_quirk_data_write(void *opaque, hwaddr addr,
1052 uint64_t data, unsigned size)
1054 VFIOrtl8168Quirk *rtl = opaque;
1055 VFIOPCIDevice *vdev = rtl->vdev;
1057 rtl->data = (uint32_t)data;
1059 vfio_region_write(&vdev->bars[2].region, addr + 0x70, data, size);
1062 static const MemoryRegionOps vfio_rtl_data_quirk = {
1063 .read = vfio_rtl8168_quirk_data_read,
1064 .write = vfio_rtl8168_quirk_data_write,
1065 .valid = {
1066 .min_access_size = 4,
1067 .max_access_size = 4,
1068 .unaligned = false,
1070 .endianness = DEVICE_LITTLE_ENDIAN,
1073 static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
1075 VFIOQuirk *quirk;
1076 VFIOrtl8168Quirk *rtl;
1078 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_REALTEK, 0x8168) || nr != 2) {
1079 return;
1082 quirk = g_malloc0(sizeof(*quirk));
1083 quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 2);
1084 quirk->nr_mem = 2;
1085 quirk->data = rtl = g_malloc0(sizeof(*rtl));
1086 rtl->vdev = vdev;
1088 memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
1089 &vfio_rtl_address_quirk, rtl,
1090 "vfio-rtl8168-window-address-quirk", 4);
1091 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
1092 0x74, &quirk->mem[0], 1);
1094 memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
1095 &vfio_rtl_data_quirk, rtl,
1096 "vfio-rtl8168-window-data-quirk", 4);
1097 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
1098 0x70, &quirk->mem[1], 1);
1100 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
1102 trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
1106 * Common quirk probe entry points.
1108 void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
1110 vfio_vga_probe_ati_3c3_quirk(vdev);
1111 vfio_vga_probe_nvidia_3d0_quirk(vdev);
1114 void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev)
1116 VFIOQuirk *quirk;
1117 int i, j;
1119 for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
1120 QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) {
1121 for (j = 0; j < quirk->nr_mem; j++) {
1122 memory_region_del_subregion(&vdev->vga.region[i].mem,
1123 &quirk->mem[j]);
1129 void vfio_vga_quirk_free(VFIOPCIDevice *vdev)
1131 int i, j;
1133 for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
1134 while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
1135 VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
1136 QLIST_REMOVE(quirk, next);
1137 for (j = 0; j < quirk->nr_mem; j++) {
1138 object_unparent(OBJECT(&quirk->mem[j]));
1140 g_free(quirk->mem);
1141 g_free(quirk->data);
1142 g_free(quirk);
1147 void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
1149 vfio_probe_ati_bar4_quirk(vdev, nr);
1150 vfio_probe_ati_bar2_4000_quirk(vdev, nr);
1151 vfio_probe_nvidia_bar5_quirk(vdev, nr);
1152 vfio_probe_nvidia_bar0_88000_quirk(vdev, nr);
1153 vfio_probe_nvidia_bar0_1800_quirk(vdev, nr);
1154 vfio_probe_rtl8168_bar2_quirk(vdev, nr);
1157 void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
1159 VFIOBAR *bar = &vdev->bars[nr];
1160 VFIOQuirk *quirk;
1161 int i;
1163 QLIST_FOREACH(quirk, &bar->quirks, next) {
1164 for (i = 0; i < quirk->nr_mem; i++) {
1165 memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]);
1170 void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr)
1172 VFIOBAR *bar = &vdev->bars[nr];
1173 int i;
1175 while (!QLIST_EMPTY(&bar->quirks)) {
1176 VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
1177 QLIST_REMOVE(quirk, next);
1178 for (i = 0; i < quirk->nr_mem; i++) {
1179 object_unparent(OBJECT(&quirk->mem[i]));
1181 g_free(quirk->mem);
1182 g_free(quirk->data);
1183 g_free(quirk);