vfio/pci: Cache vendor and device ID
[qemu/ar7.git] / hw / vfio / pci-quirks.c
blob3717e01a4215d260f9b53a6ef633dc5a4de2d093
1 /*
2 * device quirks for PCI devices
4 * Copyright Red Hat, Inc. 2012-2015
6 * Authors:
7 * Alex Williamson <alex.williamson@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
13 #include "pci.h"
14 #include "trace.h"
15 #include "qemu/range.h"
17 #define PCI_ANY_ID (~0)
19 /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
20 static bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t device)
22 return (vendor == PCI_ANY_ID || vendor == vdev->vendor_id) &&
23 (device == PCI_ANY_ID || device == vdev->device_id);
26 static bool vfio_is_vga(VFIOPCIDevice *vdev)
28 PCIDevice *pdev = &vdev->pdev;
29 uint16_t class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
31 return class == PCI_CLASS_DISPLAY_VGA;
35 * List of device ids/vendor ids for which to disable
36 * option rom loading. This avoids the guest hangs during rom
37 * execution as noticed with the BCM 57810 card for lack of a
38 * more better way to handle such issues.
39 * The user can still override by specifying a romfile or
40 * rombar=1.
41 * Please see https://bugs.launchpad.net/qemu/+bug/1284874
42 * for an analysis of the 57810 card hang. When adding
43 * a new vendor id/device id combination below, please also add
44 * your card/environment details and information that could
45 * help in debugging to the bug tracking this issue
47 static const struct {
48 uint32_t vendor;
49 uint32_t device;
50 } romblacklist[] = {
51 { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
54 bool vfio_blacklist_opt_rom(VFIOPCIDevice *vdev)
56 int i;
58 for (i = 0 ; i < ARRAY_SIZE(romblacklist); i++) {
59 if (vfio_pci_is(vdev, romblacklist[i].vendor, romblacklist[i].device)) {
60 trace_vfio_quirk_rom_blacklisted(vdev->vbasedev.name,
61 romblacklist[i].vendor,
62 romblacklist[i].device);
63 return true;
66 return false;
70 * Device specific region quirks (mostly backdoors to PCI config space)
74 * The generic window quirks operate on an address and data register,
75 * vfio_generic_window_address_quirk handles the address register and
76 * vfio_generic_window_data_quirk handles the data register. These ops
77 * pass reads and writes through to hardware until a value matching the
78 * stored address match/mask is written. When this occurs, the data
79 * register access emulated PCI config space for the device rather than
80 * passing through accesses. This enables devices where PCI config space
81 * is accessible behind a window register to maintain the virtualization
82 * provided through vfio.
84 typedef struct VFIOConfigWindowMatch {
85 uint32_t match;
86 uint32_t mask;
87 } VFIOConfigWindowMatch;
89 typedef struct VFIOConfigWindowQuirk {
90 struct VFIOPCIDevice *vdev;
92 uint32_t address_val;
94 uint32_t address_offset;
95 uint32_t data_offset;
97 bool window_enabled;
98 uint8_t bar;
100 MemoryRegion *addr_mem;
101 MemoryRegion *data_mem;
103 uint32_t nr_matches;
104 VFIOConfigWindowMatch matches[];
105 } VFIOConfigWindowQuirk;
107 static uint64_t vfio_generic_window_quirk_address_read(void *opaque,
108 hwaddr addr,
109 unsigned size)
111 VFIOConfigWindowQuirk *window = opaque;
112 VFIOPCIDevice *vdev = window->vdev;
114 return vfio_region_read(&vdev->bars[window->bar].region,
115 addr + window->address_offset, size);
118 static void vfio_generic_window_quirk_address_write(void *opaque, hwaddr addr,
119 uint64_t data,
120 unsigned size)
122 VFIOConfigWindowQuirk *window = opaque;
123 VFIOPCIDevice *vdev = window->vdev;
124 int i;
126 window->window_enabled = false;
128 vfio_region_write(&vdev->bars[window->bar].region,
129 addr + window->address_offset, data, size);
131 for (i = 0; i < window->nr_matches; i++) {
132 if ((data & ~window->matches[i].mask) == window->matches[i].match) {
133 window->window_enabled = true;
134 window->address_val = data & window->matches[i].mask;
135 trace_vfio_quirk_generic_window_address_write(vdev->vbasedev.name,
136 memory_region_name(window->addr_mem), data);
137 break;
142 static const MemoryRegionOps vfio_generic_window_address_quirk = {
143 .read = vfio_generic_window_quirk_address_read,
144 .write = vfio_generic_window_quirk_address_write,
145 .endianness = DEVICE_LITTLE_ENDIAN,
148 static uint64_t vfio_generic_window_quirk_data_read(void *opaque,
149 hwaddr addr, unsigned size)
151 VFIOConfigWindowQuirk *window = opaque;
152 VFIOPCIDevice *vdev = window->vdev;
153 uint64_t data;
155 /* Always read data reg, discard if window enabled */
156 data = vfio_region_read(&vdev->bars[window->bar].region,
157 addr + window->data_offset, size);
159 if (window->window_enabled) {
160 data = vfio_pci_read_config(&vdev->pdev, window->address_val, size);
161 trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name,
162 memory_region_name(window->data_mem), data);
165 return data;
168 static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr,
169 uint64_t data, unsigned size)
171 VFIOConfigWindowQuirk *window = opaque;
172 VFIOPCIDevice *vdev = window->vdev;
174 if (window->window_enabled) {
175 vfio_pci_write_config(&vdev->pdev, window->address_val, data, size);
176 trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name,
177 memory_region_name(window->data_mem), data);
178 return;
181 vfio_region_write(&vdev->bars[window->bar].region,
182 addr + window->data_offset, data, size);
185 static const MemoryRegionOps vfio_generic_window_data_quirk = {
186 .read = vfio_generic_window_quirk_data_read,
187 .write = vfio_generic_window_quirk_data_write,
188 .endianness = DEVICE_LITTLE_ENDIAN,
192 * The generic mirror quirk handles devices which expose PCI config space
193 * through a region within a BAR. When enabled, reads and writes are
194 * redirected through to emulated PCI config space. XXX if PCI config space
195 * used memory regions, this could just be an alias.
197 typedef struct VFIOConfigMirrorQuirk {
198 struct VFIOPCIDevice *vdev;
199 uint32_t offset;
200 uint8_t bar;
201 MemoryRegion *mem;
202 } VFIOConfigMirrorQuirk;
204 static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
205 hwaddr addr, unsigned size)
207 VFIOConfigMirrorQuirk *mirror = opaque;
208 VFIOPCIDevice *vdev = mirror->vdev;
209 uint64_t data;
211 /* Read and discard in case the hardware cares */
212 (void)vfio_region_read(&vdev->bars[mirror->bar].region,
213 addr + mirror->offset, size);
215 data = vfio_pci_read_config(&vdev->pdev, addr, size);
216 trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name,
217 memory_region_name(mirror->mem),
218 addr, data);
219 return data;
222 static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr,
223 uint64_t data, unsigned size)
225 VFIOConfigMirrorQuirk *mirror = opaque;
226 VFIOPCIDevice *vdev = mirror->vdev;
228 vfio_pci_write_config(&vdev->pdev, addr, data, size);
229 trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name,
230 memory_region_name(mirror->mem),
231 addr, data);
234 static const MemoryRegionOps vfio_generic_mirror_quirk = {
235 .read = vfio_generic_quirk_mirror_read,
236 .write = vfio_generic_quirk_mirror_write,
237 .endianness = DEVICE_LITTLE_ENDIAN,
240 /* Is range1 fully contained within range2? */
241 static bool vfio_range_contained(uint64_t first1, uint64_t len1,
242 uint64_t first2, uint64_t len2) {
243 return (first1 >= first2 && first1 + len1 <= first2 + len2);
246 #define PCI_VENDOR_ID_ATI 0x1002
249 * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
250 * through VGA register 0x3c3. On newer cards, the I/O port BAR is always
251 * BAR4 (older cards like the X550 used BAR1, but we don't care to support
252 * those). Note that on bare metal, a read of 0x3c3 doesn't always return the
253 * I/O port BAR address. Originally this was coded to return the virtual BAR
254 * address only if the physical register read returns the actual BAR address,
255 * but users have reported greater success if we return the virtual address
256 * unconditionally.
258 static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
259 hwaddr addr, unsigned size)
261 VFIOPCIDevice *vdev = opaque;
262 uint64_t data = vfio_pci_read_config(&vdev->pdev,
263 PCI_BASE_ADDRESS_4 + 1, size);
265 trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
267 return data;
270 static const MemoryRegionOps vfio_ati_3c3_quirk = {
271 .read = vfio_ati_3c3_quirk_read,
272 .endianness = DEVICE_LITTLE_ENDIAN,
275 static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice *vdev)
277 VFIOQuirk *quirk;
280 * As long as the BAR is >= 256 bytes it will be aligned such that the
281 * lower byte is always zero. Filter out anything else, if it exists.
283 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
284 !vdev->bars[4].ioport || vdev->bars[4].region.size < 256) {
285 return;
288 quirk = g_malloc0(sizeof(*quirk));
289 quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
290 quirk->nr_mem = 1;
292 memory_region_init_io(quirk->mem, OBJECT(vdev), &vfio_ati_3c3_quirk, vdev,
293 "vfio-ati-3c3-quirk", 1);
294 memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
295 3 /* offset 3 bytes from 0x3c0 */, quirk->mem);
297 QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
298 quirk, next);
300 trace_vfio_quirk_ati_3c3_probe(vdev->vbasedev.name);
304 * Newer ATI/AMD devices, including HD5450 and HD7850, have a mirror to PCI
305 * config space through MMIO BAR2 at offset 0x4000. Nothing seems to access
306 * the MMIO space directly, but a window to this space is provided through
307 * I/O port BAR4. Offset 0x0 is the address register and offset 0x4 is the
308 * data register. When the address is programmed to a range of 0x4000-0x4fff
309 * PCI configuration space is available. Experimentation seems to indicate
310 * that read-only may be provided by hardware.
312 static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice *vdev, int nr)
314 VFIOQuirk *quirk;
315 VFIOConfigWindowQuirk *window;
317 /* This windows doesn't seem to be used except by legacy VGA code */
318 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
319 !vdev->has_vga || nr != 4) {
320 return;
323 quirk = g_malloc0(sizeof(*quirk));
324 quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 2);
325 quirk->nr_mem = 2;
326 window = quirk->data = g_malloc0(sizeof(*window) +
327 sizeof(VFIOConfigWindowMatch));
328 window->vdev = vdev;
329 window->address_offset = 0;
330 window->data_offset = 4;
331 window->nr_matches = 1;
332 window->matches[0].match = 0x4000;
333 window->matches[0].mask = PCIE_CONFIG_SPACE_SIZE - 1;
334 window->bar = nr;
335 window->addr_mem = &quirk->mem[0];
336 window->data_mem = &quirk->mem[1];
338 memory_region_init_io(window->addr_mem, OBJECT(vdev),
339 &vfio_generic_window_address_quirk, window,
340 "vfio-ati-bar4-window-address-quirk", 4);
341 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
342 window->address_offset,
343 window->addr_mem, 1);
345 memory_region_init_io(window->data_mem, OBJECT(vdev),
346 &vfio_generic_window_data_quirk, window,
347 "vfio-ati-bar4-window-data-quirk", 4);
348 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
349 window->data_offset,
350 window->data_mem, 1);
352 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
354 trace_vfio_quirk_ati_bar4_probe(vdev->vbasedev.name);
358 * Trap the BAR2 MMIO mirror to config space as well.
360 static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice *vdev, int nr)
362 VFIOQuirk *quirk;
363 VFIOConfigMirrorQuirk *mirror;
365 /* Only enable on newer devices where BAR2 is 64bit */
366 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_ATI, PCI_ANY_ID) ||
367 !vdev->has_vga || nr != 2 || !vdev->bars[2].mem64) {
368 return;
371 quirk = g_malloc0(sizeof(*quirk));
372 mirror = quirk->data = g_malloc0(sizeof(*mirror));
373 mirror->mem = quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
374 quirk->nr_mem = 1;
375 mirror->vdev = vdev;
376 mirror->offset = 0x4000;
377 mirror->bar = nr;
379 memory_region_init_io(mirror->mem, OBJECT(vdev),
380 &vfio_generic_mirror_quirk, mirror,
381 "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE);
382 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
383 mirror->offset, mirror->mem, 1);
385 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
387 trace_vfio_quirk_ati_bar2_probe(vdev->vbasedev.name);
391 * Older ATI/AMD cards like the X550 have a similar window to that above.
392 * I/O port BAR1 provides a window to a mirror of PCI config space located
393 * in BAR2 at offset 0xf00. We don't care to support such older cards, but
394 * note it for future reference.
397 #define PCI_VENDOR_ID_NVIDIA 0x10de
400 * Nvidia has several different methods to get to config space, the
401 * nouveu project has several of these documented here:
402 * https://github.com/pathscale/envytools/tree/master/hwdocs
404 * The first quirk is actually not documented in envytools and is found
405 * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]). This is an
406 * NV46 chipset. The backdoor uses the legacy VGA I/O ports to access
407 * the mirror of PCI config space found at BAR0 offset 0x1800. The access
408 * sequence first writes 0x338 to I/O port 0x3d4. The target offset is
409 * then written to 0x3d0. Finally 0x538 is written for a read and 0x738
410 * is written for a write to 0x3d4. The BAR0 offset is then accessible
411 * through 0x3d0. This quirk doesn't seem to be necessary on newer cards
412 * that use the I/O port BAR5 window but it doesn't hurt to leave it.
414 typedef enum {NONE = 0, SELECT, WINDOW, READ, WRITE} VFIONvidia3d0State;
415 static const char *nv3d0_states[] = { "NONE", "SELECT",
416 "WINDOW", "READ", "WRITE" };
418 typedef struct VFIONvidia3d0Quirk {
419 VFIOPCIDevice *vdev;
420 VFIONvidia3d0State state;
421 uint32_t offset;
422 } VFIONvidia3d0Quirk;
424 static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque,
425 hwaddr addr, unsigned size)
427 VFIONvidia3d0Quirk *quirk = opaque;
428 VFIOPCIDevice *vdev = quirk->vdev;
430 quirk->state = NONE;
432 return vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
433 addr + 0x14, size);
436 static void vfio_nvidia_3d4_quirk_write(void *opaque, hwaddr addr,
437 uint64_t data, unsigned size)
439 VFIONvidia3d0Quirk *quirk = opaque;
440 VFIOPCIDevice *vdev = quirk->vdev;
441 VFIONvidia3d0State old_state = quirk->state;
443 quirk->state = NONE;
445 switch (data) {
446 case 0x338:
447 if (old_state == NONE) {
448 quirk->state = SELECT;
449 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
450 nv3d0_states[quirk->state]);
452 break;
453 case 0x538:
454 if (old_state == WINDOW) {
455 quirk->state = READ;
456 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
457 nv3d0_states[quirk->state]);
459 break;
460 case 0x738:
461 if (old_state == WINDOW) {
462 quirk->state = WRITE;
463 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
464 nv3d0_states[quirk->state]);
466 break;
469 vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
470 addr + 0x14, data, size);
473 static const MemoryRegionOps vfio_nvidia_3d4_quirk = {
474 .read = vfio_nvidia_3d4_quirk_read,
475 .write = vfio_nvidia_3d4_quirk_write,
476 .endianness = DEVICE_LITTLE_ENDIAN,
479 static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
480 hwaddr addr, unsigned size)
482 VFIONvidia3d0Quirk *quirk = opaque;
483 VFIOPCIDevice *vdev = quirk->vdev;
484 VFIONvidia3d0State old_state = quirk->state;
485 uint64_t data = vfio_vga_read(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
486 addr + 0x10, size);
488 quirk->state = NONE;
490 if (old_state == READ &&
491 (quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
492 uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
494 data = vfio_pci_read_config(&vdev->pdev, offset, size);
495 trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name,
496 offset, size, data);
499 return data;
502 static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
503 uint64_t data, unsigned size)
505 VFIONvidia3d0Quirk *quirk = opaque;
506 VFIOPCIDevice *vdev = quirk->vdev;
507 VFIONvidia3d0State old_state = quirk->state;
509 quirk->state = NONE;
511 if (old_state == SELECT) {
512 quirk->offset = (uint32_t)data;
513 quirk->state = WINDOW;
514 trace_vfio_quirk_nvidia_3d0_state(vdev->vbasedev.name,
515 nv3d0_states[quirk->state]);
516 } else if (old_state == WRITE) {
517 if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
518 uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
520 vfio_pci_write_config(&vdev->pdev, offset, data, size);
521 trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name,
522 offset, data, size);
523 return;
527 vfio_vga_write(&vdev->vga.region[QEMU_PCI_VGA_IO_HI],
528 addr + 0x10, data, size);
531 static const MemoryRegionOps vfio_nvidia_3d0_quirk = {
532 .read = vfio_nvidia_3d0_quirk_read,
533 .write = vfio_nvidia_3d0_quirk_write,
534 .endianness = DEVICE_LITTLE_ENDIAN,
537 static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice *vdev)
539 VFIOQuirk *quirk;
540 VFIONvidia3d0Quirk *data;
542 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
543 !vdev->bars[1].region.size) {
544 return;
547 quirk = g_malloc0(sizeof(*quirk));
548 quirk->data = data = g_malloc0(sizeof(*data));
549 quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 2);
550 quirk->nr_mem = 2;
551 data->vdev = vdev;
553 memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_nvidia_3d4_quirk,
554 data, "vfio-nvidia-3d4-quirk", 2);
555 memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
556 0x14 /* 0x3c0 + 0x14 */, &quirk->mem[0]);
558 memory_region_init_io(&quirk->mem[1], OBJECT(vdev), &vfio_nvidia_3d0_quirk,
559 data, "vfio-nvidia-3d0-quirk", 2);
560 memory_region_add_subregion(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].mem,
561 0x10 /* 0x3c0 + 0x10 */, &quirk->mem[1]);
563 QLIST_INSERT_HEAD(&vdev->vga.region[QEMU_PCI_VGA_IO_HI].quirks,
564 quirk, next);
566 trace_vfio_quirk_nvidia_3d0_probe(vdev->vbasedev.name);
570 * The second quirk is documented in envytools. The I/O port BAR5 is just
571 * a set of address/data ports to the MMIO BARs. The BAR we care about is
572 * again BAR0. This backdoor is apparently a bit newer than the one above
573 * so we need to not only trap 256 bytes @0x1800, but all of PCI config
574 * space, including extended space is available at the 4k @0x88000.
576 typedef struct VFIONvidiaBAR5Quirk {
577 uint32_t master;
578 uint32_t enable;
579 MemoryRegion *addr_mem;
580 MemoryRegion *data_mem;
581 bool enabled;
582 VFIOConfigWindowQuirk window; /* last for match data */
583 } VFIONvidiaBAR5Quirk;
585 static void vfio_nvidia_bar5_enable(VFIONvidiaBAR5Quirk *bar5)
587 VFIOPCIDevice *vdev = bar5->window.vdev;
589 if (((bar5->master & bar5->enable) & 0x1) == bar5->enabled) {
590 return;
593 bar5->enabled = !bar5->enabled;
594 trace_vfio_quirk_nvidia_bar5_state(vdev->vbasedev.name,
595 bar5->enabled ? "Enable" : "Disable");
596 memory_region_set_enabled(bar5->addr_mem, bar5->enabled);
597 memory_region_set_enabled(bar5->data_mem, bar5->enabled);
600 static uint64_t vfio_nvidia_bar5_quirk_master_read(void *opaque,
601 hwaddr addr, unsigned size)
603 VFIONvidiaBAR5Quirk *bar5 = opaque;
604 VFIOPCIDevice *vdev = bar5->window.vdev;
606 return vfio_region_read(&vdev->bars[5].region, addr, size);
609 static void vfio_nvidia_bar5_quirk_master_write(void *opaque, hwaddr addr,
610 uint64_t data, unsigned size)
612 VFIONvidiaBAR5Quirk *bar5 = opaque;
613 VFIOPCIDevice *vdev = bar5->window.vdev;
615 vfio_region_write(&vdev->bars[5].region, addr, data, size);
617 bar5->master = data;
618 vfio_nvidia_bar5_enable(bar5);
621 static const MemoryRegionOps vfio_nvidia_bar5_quirk_master = {
622 .read = vfio_nvidia_bar5_quirk_master_read,
623 .write = vfio_nvidia_bar5_quirk_master_write,
624 .endianness = DEVICE_LITTLE_ENDIAN,
627 static uint64_t vfio_nvidia_bar5_quirk_enable_read(void *opaque,
628 hwaddr addr, unsigned size)
630 VFIONvidiaBAR5Quirk *bar5 = opaque;
631 VFIOPCIDevice *vdev = bar5->window.vdev;
633 return vfio_region_read(&vdev->bars[5].region, addr + 4, size);
636 static void vfio_nvidia_bar5_quirk_enable_write(void *opaque, hwaddr addr,
637 uint64_t data, unsigned size)
639 VFIONvidiaBAR5Quirk *bar5 = opaque;
640 VFIOPCIDevice *vdev = bar5->window.vdev;
642 vfio_region_write(&vdev->bars[5].region, addr + 4, data, size);
644 bar5->enable = data;
645 vfio_nvidia_bar5_enable(bar5);
648 static const MemoryRegionOps vfio_nvidia_bar5_quirk_enable = {
649 .read = vfio_nvidia_bar5_quirk_enable_read,
650 .write = vfio_nvidia_bar5_quirk_enable_write,
651 .endianness = DEVICE_LITTLE_ENDIAN,
654 static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice *vdev, int nr)
656 VFIOQuirk *quirk;
657 VFIONvidiaBAR5Quirk *bar5;
658 VFIOConfigWindowQuirk *window;
660 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
661 !vdev->has_vga || nr != 5) {
662 return;
665 quirk = g_malloc0(sizeof(*quirk));
666 quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 4);
667 quirk->nr_mem = 4;
668 bar5 = quirk->data = g_malloc0(sizeof(*bar5) +
669 (sizeof(VFIOConfigWindowMatch) * 2));
670 window = &bar5->window;
672 window->vdev = vdev;
673 window->address_offset = 0x8;
674 window->data_offset = 0xc;
675 window->nr_matches = 2;
676 window->matches[0].match = 0x1800;
677 window->matches[0].mask = PCI_CONFIG_SPACE_SIZE - 1;
678 window->matches[1].match = 0x88000;
679 window->matches[1].mask = PCIE_CONFIG_SPACE_SIZE - 1;
680 window->bar = nr;
681 window->addr_mem = bar5->addr_mem = &quirk->mem[0];
682 window->data_mem = bar5->data_mem = &quirk->mem[1];
684 memory_region_init_io(window->addr_mem, OBJECT(vdev),
685 &vfio_generic_window_address_quirk, window,
686 "vfio-nvidia-bar5-window-address-quirk", 4);
687 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
688 window->address_offset,
689 window->addr_mem, 1);
690 memory_region_set_enabled(window->addr_mem, false);
692 memory_region_init_io(window->data_mem, OBJECT(vdev),
693 &vfio_generic_window_data_quirk, window,
694 "vfio-nvidia-bar5-window-data-quirk", 4);
695 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
696 window->data_offset,
697 window->data_mem, 1);
698 memory_region_set_enabled(window->data_mem, false);
700 memory_region_init_io(&quirk->mem[2], OBJECT(vdev),
701 &vfio_nvidia_bar5_quirk_master, bar5,
702 "vfio-nvidia-bar5-master-quirk", 4);
703 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
704 0, &quirk->mem[2], 1);
706 memory_region_init_io(&quirk->mem[3], OBJECT(vdev),
707 &vfio_nvidia_bar5_quirk_enable, bar5,
708 "vfio-nvidia-bar5-enable-quirk", 4);
709 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
710 4, &quirk->mem[3], 1);
712 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
714 trace_vfio_quirk_nvidia_bar5_probe(vdev->vbasedev.name);
718 * Finally, BAR0 itself. We want to redirect any accesses to either
719 * 0x1800 or 0x88000 through the PCI config space access functions.
721 static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
722 uint64_t data, unsigned size)
724 VFIOConfigMirrorQuirk *mirror = opaque;
725 VFIOPCIDevice *vdev = mirror->vdev;
726 PCIDevice *pdev = &vdev->pdev;
728 vfio_generic_quirk_mirror_write(opaque, addr, data, size);
731 * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
732 * MSI capability ID register. Both the ID and next register are
733 * read-only, so we allow writes covering either of those to real hw.
735 if ((pdev->cap_present & QEMU_PCI_CAP_MSI) &&
736 vfio_range_contained(addr, size, pdev->msi_cap, PCI_MSI_FLAGS)) {
737 vfio_region_write(&vdev->bars[mirror->bar].region,
738 addr + mirror->offset, data, size);
739 trace_vfio_quirk_nvidia_bar0_msi_ack(vdev->vbasedev.name);
743 static const MemoryRegionOps vfio_nvidia_mirror_quirk = {
744 .read = vfio_generic_quirk_mirror_read,
745 .write = vfio_nvidia_quirk_mirror_write,
746 .endianness = DEVICE_LITTLE_ENDIAN,
749 static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice *vdev, int nr)
751 VFIOQuirk *quirk;
752 VFIOConfigMirrorQuirk *mirror;
754 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID) ||
755 !vfio_is_vga(vdev) || nr != 0) {
756 return;
759 quirk = g_malloc0(sizeof(*quirk));
760 mirror = quirk->data = g_malloc0(sizeof(*mirror));
761 mirror->mem = quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
762 quirk->nr_mem = 1;
763 mirror->vdev = vdev;
764 mirror->offset = 0x88000;
765 mirror->bar = nr;
767 memory_region_init_io(mirror->mem, OBJECT(vdev),
768 &vfio_nvidia_mirror_quirk, mirror,
769 "vfio-nvidia-bar0-88000-mirror-quirk",
770 PCIE_CONFIG_SPACE_SIZE);
771 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
772 mirror->offset, mirror->mem, 1);
774 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
776 /* The 0x1800 offset mirror only seems to get used by legacy VGA */
777 if (vdev->has_vga) {
778 quirk = g_malloc0(sizeof(*quirk));
779 mirror = quirk->data = g_malloc0(sizeof(*mirror));
780 mirror->mem = quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 1);
781 quirk->nr_mem = 1;
782 mirror->vdev = vdev;
783 mirror->offset = 0x1800;
784 mirror->bar = nr;
786 memory_region_init_io(mirror->mem, OBJECT(vdev),
787 &vfio_nvidia_mirror_quirk, mirror,
788 "vfio-nvidia-bar0-1800-mirror-quirk",
789 PCI_CONFIG_SPACE_SIZE);
790 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
791 mirror->offset, mirror->mem, 1);
793 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
796 trace_vfio_quirk_nvidia_bar0_probe(vdev->vbasedev.name);
800 * TODO - Some Nvidia devices provide config access to their companion HDA
801 * device and even to their parent bridge via these config space mirrors.
802 * Add quirks for those regions.
805 #define PCI_VENDOR_ID_REALTEK 0x10ec
808 * RTL8168 devices have a backdoor that can access the MSI-X table. At BAR2
809 * offset 0x70 there is a dword data register, offset 0x74 is a dword address
810 * register. According to the Linux r8169 driver, the MSI-X table is addressed
811 * when the "type" portion of the address register is set to 0x1. This appears
812 * to be bits 16:30. Bit 31 is both a write indicator and some sort of
813 * "address latched" indicator. Bits 12:15 are a mask field, which we can
814 * ignore because the MSI-X table should always be accessed as a dword (full
815 * mask). Bits 0:11 is offset within the type.
817 * Example trace:
819 * Read from MSI-X table offset 0
820 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
821 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
822 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
824 * Write 0xfee00000 to MSI-X table offset 0
825 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
826 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
827 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
829 typedef struct VFIOrtl8168Quirk {
830 VFIOPCIDevice *vdev;
831 uint32_t addr;
832 uint32_t data;
833 bool enabled;
834 } VFIOrtl8168Quirk;
836 static uint64_t vfio_rtl8168_quirk_address_read(void *opaque,
837 hwaddr addr, unsigned size)
839 VFIOrtl8168Quirk *rtl = opaque;
840 VFIOPCIDevice *vdev = rtl->vdev;
841 uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
843 if (rtl->enabled) {
844 data = rtl->addr ^ 0x80000000U; /* latch/complete */
845 trace_vfio_quirk_rtl8168_fake_latch(vdev->vbasedev.name, data);
848 return data;
851 static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
852 uint64_t data, unsigned size)
854 VFIOrtl8168Quirk *rtl = opaque;
855 VFIOPCIDevice *vdev = rtl->vdev;
857 rtl->enabled = false;
859 if ((data & 0x7fff0000) == 0x10000) { /* MSI-X table */
860 rtl->enabled = true;
861 rtl->addr = (uint32_t)data;
863 if (data & 0x80000000U) { /* Do write */
864 if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
865 hwaddr offset = data & 0xfff;
866 uint64_t val = rtl->data;
868 trace_vfio_quirk_rtl8168_msix_write(vdev->vbasedev.name,
869 (uint16_t)offset, val);
871 /* Write to the proper guest MSI-X table instead */
872 memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
873 offset, val, size,
874 MEMTXATTRS_UNSPECIFIED);
876 return; /* Do not write guest MSI-X data to hardware */
880 vfio_region_write(&vdev->bars[2].region, addr + 0x74, data, size);
883 static const MemoryRegionOps vfio_rtl_address_quirk = {
884 .read = vfio_rtl8168_quirk_address_read,
885 .write = vfio_rtl8168_quirk_address_write,
886 .valid = {
887 .min_access_size = 4,
888 .max_access_size = 4,
889 .unaligned = false,
891 .endianness = DEVICE_LITTLE_ENDIAN,
894 static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
895 hwaddr addr, unsigned size)
897 VFIOrtl8168Quirk *rtl = opaque;
898 VFIOPCIDevice *vdev = rtl->vdev;
899 uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
901 if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
902 hwaddr offset = rtl->addr & 0xfff;
903 memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset,
904 &data, size, MEMTXATTRS_UNSPECIFIED);
905 trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data);
908 return data;
911 static void vfio_rtl8168_quirk_data_write(void *opaque, hwaddr addr,
912 uint64_t data, unsigned size)
914 VFIOrtl8168Quirk *rtl = opaque;
915 VFIOPCIDevice *vdev = rtl->vdev;
917 rtl->data = (uint32_t)data;
919 vfio_region_write(&vdev->bars[2].region, addr + 0x70, data, size);
922 static const MemoryRegionOps vfio_rtl_data_quirk = {
923 .read = vfio_rtl8168_quirk_data_read,
924 .write = vfio_rtl8168_quirk_data_write,
925 .valid = {
926 .min_access_size = 4,
927 .max_access_size = 4,
928 .unaligned = false,
930 .endianness = DEVICE_LITTLE_ENDIAN,
933 static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
935 VFIOQuirk *quirk;
936 VFIOrtl8168Quirk *rtl;
938 if (!vfio_pci_is(vdev, PCI_VENDOR_ID_REALTEK, 0x8168) || nr != 2) {
939 return;
942 quirk = g_malloc0(sizeof(*quirk));
943 quirk->mem = g_malloc0_n(sizeof(MemoryRegion), 2);
944 quirk->nr_mem = 2;
945 quirk->data = rtl = g_malloc0(sizeof(*rtl));
946 rtl->vdev = vdev;
948 memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
949 &vfio_rtl_address_quirk, rtl,
950 "vfio-rtl8168-window-address-quirk", 4);
951 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
952 0x74, &quirk->mem[0], 1);
954 memory_region_init_io(&quirk->mem[1], OBJECT(vdev),
955 &vfio_rtl_data_quirk, rtl,
956 "vfio-rtl8168-window-data-quirk", 4);
957 memory_region_add_subregion_overlap(&vdev->bars[nr].region.mem,
958 0x70, &quirk->mem[1], 1);
960 QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
962 trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
966 * Common quirk probe entry points.
968 void vfio_vga_quirk_setup(VFIOPCIDevice *vdev)
970 vfio_vga_probe_ati_3c3_quirk(vdev);
971 vfio_vga_probe_nvidia_3d0_quirk(vdev);
974 void vfio_vga_quirk_teardown(VFIOPCIDevice *vdev)
976 VFIOQuirk *quirk;
977 int i, j;
979 for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
980 QLIST_FOREACH(quirk, &vdev->vga.region[i].quirks, next) {
981 for (j = 0; j < quirk->nr_mem; j++) {
982 memory_region_del_subregion(&vdev->vga.region[i].mem,
983 &quirk->mem[j]);
989 void vfio_vga_quirk_free(VFIOPCIDevice *vdev)
991 int i, j;
993 for (i = 0; i < ARRAY_SIZE(vdev->vga.region); i++) {
994 while (!QLIST_EMPTY(&vdev->vga.region[i].quirks)) {
995 VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga.region[i].quirks);
996 QLIST_REMOVE(quirk, next);
997 for (j = 0; j < quirk->nr_mem; j++) {
998 object_unparent(OBJECT(&quirk->mem[j]));
1000 g_free(quirk->mem);
1001 g_free(quirk->data);
1002 g_free(quirk);
1007 void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
1009 vfio_probe_ati_bar4_quirk(vdev, nr);
1010 vfio_probe_ati_bar2_quirk(vdev, nr);
1011 vfio_probe_nvidia_bar5_quirk(vdev, nr);
1012 vfio_probe_nvidia_bar0_quirk(vdev, nr);
1013 vfio_probe_rtl8168_bar2_quirk(vdev, nr);
1016 void vfio_bar_quirk_teardown(VFIOPCIDevice *vdev, int nr)
1018 VFIOBAR *bar = &vdev->bars[nr];
1019 VFIOQuirk *quirk;
1020 int i;
1022 QLIST_FOREACH(quirk, &bar->quirks, next) {
1023 for (i = 0; i < quirk->nr_mem; i++) {
1024 memory_region_del_subregion(&bar->region.mem, &quirk->mem[i]);
1029 void vfio_bar_quirk_free(VFIOPCIDevice *vdev, int nr)
1031 VFIOBAR *bar = &vdev->bars[nr];
1032 int i;
1034 while (!QLIST_EMPTY(&bar->quirks)) {
1035 VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
1036 QLIST_REMOVE(quirk, next);
1037 for (i = 0; i < quirk->nr_mem; i++) {
1038 object_unparent(OBJECT(&quirk->mem[i]));
1040 g_free(quirk->mem);
1041 g_free(quirk->data);
1042 g_free(quirk);
1047 * Reset quirks
1051 * AMD Radeon PCI config reset, based on Linux:
1052 * drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running()
1053 * drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset
1054 * drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc()
1055 * drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock()
1056 * IDs: include/drm/drm_pciids.h
1057 * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0
1059 * Bonaire and Hawaii GPUs do not respond to a bus reset. This is a bug in the
1060 * hardware that should be fixed on future ASICs. The symptom of this is that
1061 * once the accerlated driver loads, Windows guests will bsod on subsequent
1062 * attmpts to load the driver, such as after VM reset or shutdown/restart. To
1063 * work around this, we do an AMD specific PCI config reset, followed by an SMC
1064 * reset. The PCI config reset only works if SMC firmware is running, so we
1065 * have a dependency on the state of the device as to whether this reset will
1066 * be effective. There are still cases where we won't be able to kick the
1067 * device into working, but this greatly improves the usability overall. The
1068 * config reset magic is relatively common on AMD GPUs, but the setup and SMC
1069 * poking is largely ASIC specific.
1071 static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
1073 uint32_t clk, pc_c;
1076 * Registers 200h and 204h are index and data registers for accessing
1077 * indirect configuration registers within the device.
1079 vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1080 clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1081 vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
1082 pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1084 return (!(clk & 1) && (0x20100 <= pc_c));
1088 * The scope of a config reset is controlled by a mode bit in the misc register
1089 * and a fuse, exposed as a bit in another register. The fuse is the default
1090 * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula
1091 * scope = !(misc ^ fuse), where the resulting scope is defined the same as
1092 * the fuse. A truth table therefore tells us that if misc == fuse, we need
1093 * to flip the value of the bit in the misc register.
1095 static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
1097 uint32_t misc, fuse;
1098 bool a, b;
1100 vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
1101 fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1102 b = fuse & 64;
1104 vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
1105 misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1106 a = misc & 2;
1108 if (a == b) {
1109 vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
1110 vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */
1114 static int vfio_radeon_reset(VFIOPCIDevice *vdev)
1116 PCIDevice *pdev = &vdev->pdev;
1117 int i, ret = 0;
1118 uint32_t data;
1120 /* Defer to a kernel implemented reset */
1121 if (vdev->vbasedev.reset_works) {
1122 trace_vfio_quirk_ati_bonaire_reset_skipped(vdev->vbasedev.name);
1123 return -ENODEV;
1126 /* Enable only memory BAR access */
1127 vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
1129 /* Reset only works if SMC firmware is loaded and running */
1130 if (!vfio_radeon_smc_is_running(vdev)) {
1131 ret = -EINVAL;
1132 trace_vfio_quirk_ati_bonaire_reset_no_smc(vdev->vbasedev.name);
1133 goto out;
1136 /* Make sure only the GFX function is reset */
1137 vfio_radeon_set_gfx_only_reset(vdev);
1139 /* AMD PCI config reset */
1140 vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
1141 usleep(100);
1143 /* Read back the memory size to make sure we're out of reset */
1144 for (i = 0; i < 100000; i++) {
1145 if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
1146 goto reset_smc;
1148 usleep(1);
1151 trace_vfio_quirk_ati_bonaire_reset_timeout(vdev->vbasedev.name);
1153 reset_smc:
1154 /* Reset SMC */
1155 vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
1156 data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1157 data |= 1;
1158 vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
1160 /* Disable SMC clock */
1161 vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
1162 data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
1163 data |= 1;
1164 vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
1166 trace_vfio_quirk_ati_bonaire_reset_done(vdev->vbasedev.name);
1168 out:
1169 /* Restore PCI command register */
1170 vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
1172 return ret;
1175 void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev)
1177 switch (vdev->vendor_id) {
1178 case 0x1002:
1179 switch (vdev->device_id) {
1180 /* Bonaire */
1181 case 0x6649: /* Bonaire [FirePro W5100] */
1182 case 0x6650:
1183 case 0x6651:
1184 case 0x6658: /* Bonaire XTX [Radeon R7 260X] */
1185 case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */
1186 case 0x665d: /* Bonaire [Radeon R7 200 Series] */
1187 /* Hawaii */
1188 case 0x67A0: /* Hawaii XT GL [FirePro W9100] */
1189 case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */
1190 case 0x67A2:
1191 case 0x67A8:
1192 case 0x67A9:
1193 case 0x67AA:
1194 case 0x67B0: /* Hawaii XT [Radeon R9 290X] */
1195 case 0x67B1: /* Hawaii PRO [Radeon R9 290] */
1196 case 0x67B8:
1197 case 0x67B9:
1198 case 0x67BA:
1199 case 0x67BE:
1200 vdev->resetfn = vfio_radeon_reset;
1201 trace_vfio_quirk_ati_bonaire_reset(vdev->vbasedev.name);
1202 break;
1204 break;