2 * device quirks for PCI devices
4 * Copyright Red Hat, Inc. 2012-2015
7 * Alex Williamson <alex.williamson@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
15 #include "qemu/range.h"
17 #define PCI_ANY_ID (~0)
19 /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
20 static bool vfio_pci_is(VFIOPCIDevice
*vdev
, uint32_t vendor
, uint32_t device
)
22 return (vendor
== PCI_ANY_ID
|| vendor
== vdev
->vendor_id
) &&
23 (device
== PCI_ANY_ID
|| device
== vdev
->device_id
);
26 static bool vfio_is_vga(VFIOPCIDevice
*vdev
)
28 PCIDevice
*pdev
= &vdev
->pdev
;
29 uint16_t class = pci_get_word(pdev
->config
+ PCI_CLASS_DEVICE
);
31 return class == PCI_CLASS_DISPLAY_VGA
;
35 * List of device ids/vendor ids for which to disable
36 * option rom loading. This avoids the guest hangs during rom
37 * execution as noticed with the BCM 57810 card for lack of a
38 * more better way to handle such issues.
39 * The user can still override by specifying a romfile or
41 * Please see https://bugs.launchpad.net/qemu/+bug/1284874
42 * for an analysis of the 57810 card hang. When adding
43 * a new vendor id/device id combination below, please also add
44 * your card/environment details and information that could
45 * help in debugging to the bug tracking this issue
51 { 0x14e4, 0x168e }, /* Broadcom BCM 57810 */
54 bool vfio_blacklist_opt_rom(VFIOPCIDevice
*vdev
)
58 for (i
= 0 ; i
< ARRAY_SIZE(romblacklist
); i
++) {
59 if (vfio_pci_is(vdev
, romblacklist
[i
].vendor
, romblacklist
[i
].device
)) {
60 trace_vfio_quirk_rom_blacklisted(vdev
->vbasedev
.name
,
61 romblacklist
[i
].vendor
,
62 romblacklist
[i
].device
);
70 * Device specific region quirks (mostly backdoors to PCI config space)
74 * The generic window quirks operate on an address and data register,
75 * vfio_generic_window_address_quirk handles the address register and
76 * vfio_generic_window_data_quirk handles the data register. These ops
77 * pass reads and writes through to hardware until a value matching the
78 * stored address match/mask is written. When this occurs, the data
79 * register access emulated PCI config space for the device rather than
80 * passing through accesses. This enables devices where PCI config space
81 * is accessible behind a window register to maintain the virtualization
82 * provided through vfio.
84 typedef struct VFIOConfigWindowMatch
{
87 } VFIOConfigWindowMatch
;
89 typedef struct VFIOConfigWindowQuirk
{
90 struct VFIOPCIDevice
*vdev
;
94 uint32_t address_offset
;
100 MemoryRegion
*addr_mem
;
101 MemoryRegion
*data_mem
;
104 VFIOConfigWindowMatch matches
[];
105 } VFIOConfigWindowQuirk
;
107 static uint64_t vfio_generic_window_quirk_address_read(void *opaque
,
111 VFIOConfigWindowQuirk
*window
= opaque
;
112 VFIOPCIDevice
*vdev
= window
->vdev
;
114 return vfio_region_read(&vdev
->bars
[window
->bar
].region
,
115 addr
+ window
->address_offset
, size
);
118 static void vfio_generic_window_quirk_address_write(void *opaque
, hwaddr addr
,
122 VFIOConfigWindowQuirk
*window
= opaque
;
123 VFIOPCIDevice
*vdev
= window
->vdev
;
126 window
->window_enabled
= false;
128 vfio_region_write(&vdev
->bars
[window
->bar
].region
,
129 addr
+ window
->address_offset
, data
, size
);
131 for (i
= 0; i
< window
->nr_matches
; i
++) {
132 if ((data
& ~window
->matches
[i
].mask
) == window
->matches
[i
].match
) {
133 window
->window_enabled
= true;
134 window
->address_val
= data
& window
->matches
[i
].mask
;
135 trace_vfio_quirk_generic_window_address_write(vdev
->vbasedev
.name
,
136 memory_region_name(window
->addr_mem
), data
);
142 static const MemoryRegionOps vfio_generic_window_address_quirk
= {
143 .read
= vfio_generic_window_quirk_address_read
,
144 .write
= vfio_generic_window_quirk_address_write
,
145 .endianness
= DEVICE_LITTLE_ENDIAN
,
148 static uint64_t vfio_generic_window_quirk_data_read(void *opaque
,
149 hwaddr addr
, unsigned size
)
151 VFIOConfigWindowQuirk
*window
= opaque
;
152 VFIOPCIDevice
*vdev
= window
->vdev
;
155 /* Always read data reg, discard if window enabled */
156 data
= vfio_region_read(&vdev
->bars
[window
->bar
].region
,
157 addr
+ window
->data_offset
, size
);
159 if (window
->window_enabled
) {
160 data
= vfio_pci_read_config(&vdev
->pdev
, window
->address_val
, size
);
161 trace_vfio_quirk_generic_window_data_read(vdev
->vbasedev
.name
,
162 memory_region_name(window
->data_mem
), data
);
168 static void vfio_generic_window_quirk_data_write(void *opaque
, hwaddr addr
,
169 uint64_t data
, unsigned size
)
171 VFIOConfigWindowQuirk
*window
= opaque
;
172 VFIOPCIDevice
*vdev
= window
->vdev
;
174 if (window
->window_enabled
) {
175 vfio_pci_write_config(&vdev
->pdev
, window
->address_val
, data
, size
);
176 trace_vfio_quirk_generic_window_data_write(vdev
->vbasedev
.name
,
177 memory_region_name(window
->data_mem
), data
);
181 vfio_region_write(&vdev
->bars
[window
->bar
].region
,
182 addr
+ window
->data_offset
, data
, size
);
185 static const MemoryRegionOps vfio_generic_window_data_quirk
= {
186 .read
= vfio_generic_window_quirk_data_read
,
187 .write
= vfio_generic_window_quirk_data_write
,
188 .endianness
= DEVICE_LITTLE_ENDIAN
,
192 * The generic mirror quirk handles devices which expose PCI config space
193 * through a region within a BAR. When enabled, reads and writes are
194 * redirected through to emulated PCI config space. XXX if PCI config space
195 * used memory regions, this could just be an alias.
197 typedef struct VFIOConfigMirrorQuirk
{
198 struct VFIOPCIDevice
*vdev
;
202 } VFIOConfigMirrorQuirk
;
204 static uint64_t vfio_generic_quirk_mirror_read(void *opaque
,
205 hwaddr addr
, unsigned size
)
207 VFIOConfigMirrorQuirk
*mirror
= opaque
;
208 VFIOPCIDevice
*vdev
= mirror
->vdev
;
211 /* Read and discard in case the hardware cares */
212 (void)vfio_region_read(&vdev
->bars
[mirror
->bar
].region
,
213 addr
+ mirror
->offset
, size
);
215 data
= vfio_pci_read_config(&vdev
->pdev
, addr
, size
);
216 trace_vfio_quirk_generic_mirror_read(vdev
->vbasedev
.name
,
217 memory_region_name(mirror
->mem
),
222 static void vfio_generic_quirk_mirror_write(void *opaque
, hwaddr addr
,
223 uint64_t data
, unsigned size
)
225 VFIOConfigMirrorQuirk
*mirror
= opaque
;
226 VFIOPCIDevice
*vdev
= mirror
->vdev
;
228 vfio_pci_write_config(&vdev
->pdev
, addr
, data
, size
);
229 trace_vfio_quirk_generic_mirror_write(vdev
->vbasedev
.name
,
230 memory_region_name(mirror
->mem
),
234 static const MemoryRegionOps vfio_generic_mirror_quirk
= {
235 .read
= vfio_generic_quirk_mirror_read
,
236 .write
= vfio_generic_quirk_mirror_write
,
237 .endianness
= DEVICE_LITTLE_ENDIAN
,
240 /* Is range1 fully contained within range2? */
241 static bool vfio_range_contained(uint64_t first1
, uint64_t len1
,
242 uint64_t first2
, uint64_t len2
) {
243 return (first1
>= first2
&& first1
+ len1
<= first2
+ len2
);
246 #define PCI_VENDOR_ID_ATI 0x1002
249 * Radeon HD cards (HD5450 & HD7850) report the upper byte of the I/O port BAR
250 * through VGA register 0x3c3. On newer cards, the I/O port BAR is always
251 * BAR4 (older cards like the X550 used BAR1, but we don't care to support
252 * those). Note that on bare metal, a read of 0x3c3 doesn't always return the
253 * I/O port BAR address. Originally this was coded to return the virtual BAR
254 * address only if the physical register read returns the actual BAR address,
255 * but users have reported greater success if we return the virtual address
258 static uint64_t vfio_ati_3c3_quirk_read(void *opaque
,
259 hwaddr addr
, unsigned size
)
261 VFIOPCIDevice
*vdev
= opaque
;
262 uint64_t data
= vfio_pci_read_config(&vdev
->pdev
,
263 PCI_BASE_ADDRESS_4
+ 1, size
);
265 trace_vfio_quirk_ati_3c3_read(vdev
->vbasedev
.name
, data
);
270 static const MemoryRegionOps vfio_ati_3c3_quirk
= {
271 .read
= vfio_ati_3c3_quirk_read
,
272 .endianness
= DEVICE_LITTLE_ENDIAN
,
275 static void vfio_vga_probe_ati_3c3_quirk(VFIOPCIDevice
*vdev
)
280 * As long as the BAR is >= 256 bytes it will be aligned such that the
281 * lower byte is always zero. Filter out anything else, if it exists.
283 if (!vfio_pci_is(vdev
, PCI_VENDOR_ID_ATI
, PCI_ANY_ID
) ||
284 !vdev
->bars
[4].ioport
|| vdev
->bars
[4].region
.size
< 256) {
288 quirk
= g_malloc0(sizeof(*quirk
));
289 quirk
->mem
= g_malloc0_n(sizeof(MemoryRegion
), 1);
292 memory_region_init_io(quirk
->mem
, OBJECT(vdev
), &vfio_ati_3c3_quirk
, vdev
,
293 "vfio-ati-3c3-quirk", 1);
294 memory_region_add_subregion(&vdev
->vga
.region
[QEMU_PCI_VGA_IO_HI
].mem
,
295 3 /* offset 3 bytes from 0x3c0 */, quirk
->mem
);
297 QLIST_INSERT_HEAD(&vdev
->vga
.region
[QEMU_PCI_VGA_IO_HI
].quirks
,
300 trace_vfio_quirk_ati_3c3_probe(vdev
->vbasedev
.name
);
304 * Newer ATI/AMD devices, including HD5450 and HD7850, have a mirror to PCI
305 * config space through MMIO BAR2 at offset 0x4000. Nothing seems to access
306 * the MMIO space directly, but a window to this space is provided through
307 * I/O port BAR4. Offset 0x0 is the address register and offset 0x4 is the
308 * data register. When the address is programmed to a range of 0x4000-0x4fff
309 * PCI configuration space is available. Experimentation seems to indicate
310 * that read-only may be provided by hardware.
312 static void vfio_probe_ati_bar4_quirk(VFIOPCIDevice
*vdev
, int nr
)
315 VFIOConfigWindowQuirk
*window
;
317 /* This windows doesn't seem to be used except by legacy VGA code */
318 if (!vfio_pci_is(vdev
, PCI_VENDOR_ID_ATI
, PCI_ANY_ID
) ||
319 !vdev
->has_vga
|| nr
!= 4) {
323 quirk
= g_malloc0(sizeof(*quirk
));
324 quirk
->mem
= g_malloc0_n(sizeof(MemoryRegion
), 2);
326 window
= quirk
->data
= g_malloc0(sizeof(*window
) +
327 sizeof(VFIOConfigWindowMatch
));
329 window
->address_offset
= 0;
330 window
->data_offset
= 4;
331 window
->nr_matches
= 1;
332 window
->matches
[0].match
= 0x4000;
333 window
->matches
[0].mask
= PCIE_CONFIG_SPACE_SIZE
- 1;
335 window
->addr_mem
= &quirk
->mem
[0];
336 window
->data_mem
= &quirk
->mem
[1];
338 memory_region_init_io(window
->addr_mem
, OBJECT(vdev
),
339 &vfio_generic_window_address_quirk
, window
,
340 "vfio-ati-bar4-window-address-quirk", 4);
341 memory_region_add_subregion_overlap(&vdev
->bars
[nr
].region
.mem
,
342 window
->address_offset
,
343 window
->addr_mem
, 1);
345 memory_region_init_io(window
->data_mem
, OBJECT(vdev
),
346 &vfio_generic_window_data_quirk
, window
,
347 "vfio-ati-bar4-window-data-quirk", 4);
348 memory_region_add_subregion_overlap(&vdev
->bars
[nr
].region
.mem
,
350 window
->data_mem
, 1);
352 QLIST_INSERT_HEAD(&vdev
->bars
[nr
].quirks
, quirk
, next
);
354 trace_vfio_quirk_ati_bar4_probe(vdev
->vbasedev
.name
);
358 * Trap the BAR2 MMIO mirror to config space as well.
360 static void vfio_probe_ati_bar2_quirk(VFIOPCIDevice
*vdev
, int nr
)
363 VFIOConfigMirrorQuirk
*mirror
;
365 /* Only enable on newer devices where BAR2 is 64bit */
366 if (!vfio_pci_is(vdev
, PCI_VENDOR_ID_ATI
, PCI_ANY_ID
) ||
367 !vdev
->has_vga
|| nr
!= 2 || !vdev
->bars
[2].mem64
) {
371 quirk
= g_malloc0(sizeof(*quirk
));
372 mirror
= quirk
->data
= g_malloc0(sizeof(*mirror
));
373 mirror
->mem
= quirk
->mem
= g_malloc0_n(sizeof(MemoryRegion
), 1);
376 mirror
->offset
= 0x4000;
379 memory_region_init_io(mirror
->mem
, OBJECT(vdev
),
380 &vfio_generic_mirror_quirk
, mirror
,
381 "vfio-ati-bar2-4000-quirk", PCI_CONFIG_SPACE_SIZE
);
382 memory_region_add_subregion_overlap(&vdev
->bars
[nr
].region
.mem
,
383 mirror
->offset
, mirror
->mem
, 1);
385 QLIST_INSERT_HEAD(&vdev
->bars
[nr
].quirks
, quirk
, next
);
387 trace_vfio_quirk_ati_bar2_probe(vdev
->vbasedev
.name
);
391 * Older ATI/AMD cards like the X550 have a similar window to that above.
392 * I/O port BAR1 provides a window to a mirror of PCI config space located
393 * in BAR2 at offset 0xf00. We don't care to support such older cards, but
394 * note it for future reference.
397 #define PCI_VENDOR_ID_NVIDIA 0x10de
400 * Nvidia has several different methods to get to config space, the
401 * nouveu project has several of these documented here:
402 * https://github.com/pathscale/envytools/tree/master/hwdocs
404 * The first quirk is actually not documented in envytools and is found
405 * on 10de:01d1 (NVIDIA Corporation G72 [GeForce 7300 LE]). This is an
406 * NV46 chipset. The backdoor uses the legacy VGA I/O ports to access
407 * the mirror of PCI config space found at BAR0 offset 0x1800. The access
408 * sequence first writes 0x338 to I/O port 0x3d4. The target offset is
409 * then written to 0x3d0. Finally 0x538 is written for a read and 0x738
410 * is written for a write to 0x3d4. The BAR0 offset is then accessible
411 * through 0x3d0. This quirk doesn't seem to be necessary on newer cards
412 * that use the I/O port BAR5 window but it doesn't hurt to leave it.
414 typedef enum {NONE
= 0, SELECT
, WINDOW
, READ
, WRITE
} VFIONvidia3d0State
;
415 static const char *nv3d0_states
[] = { "NONE", "SELECT",
416 "WINDOW", "READ", "WRITE" };
418 typedef struct VFIONvidia3d0Quirk
{
420 VFIONvidia3d0State state
;
422 } VFIONvidia3d0Quirk
;
424 static uint64_t vfio_nvidia_3d4_quirk_read(void *opaque
,
425 hwaddr addr
, unsigned size
)
427 VFIONvidia3d0Quirk
*quirk
= opaque
;
428 VFIOPCIDevice
*vdev
= quirk
->vdev
;
432 return vfio_vga_read(&vdev
->vga
.region
[QEMU_PCI_VGA_IO_HI
],
436 static void vfio_nvidia_3d4_quirk_write(void *opaque
, hwaddr addr
,
437 uint64_t data
, unsigned size
)
439 VFIONvidia3d0Quirk
*quirk
= opaque
;
440 VFIOPCIDevice
*vdev
= quirk
->vdev
;
441 VFIONvidia3d0State old_state
= quirk
->state
;
447 if (old_state
== NONE
) {
448 quirk
->state
= SELECT
;
449 trace_vfio_quirk_nvidia_3d0_state(vdev
->vbasedev
.name
,
450 nv3d0_states
[quirk
->state
]);
454 if (old_state
== WINDOW
) {
456 trace_vfio_quirk_nvidia_3d0_state(vdev
->vbasedev
.name
,
457 nv3d0_states
[quirk
->state
]);
461 if (old_state
== WINDOW
) {
462 quirk
->state
= WRITE
;
463 trace_vfio_quirk_nvidia_3d0_state(vdev
->vbasedev
.name
,
464 nv3d0_states
[quirk
->state
]);
469 vfio_vga_write(&vdev
->vga
.region
[QEMU_PCI_VGA_IO_HI
],
470 addr
+ 0x14, data
, size
);
473 static const MemoryRegionOps vfio_nvidia_3d4_quirk
= {
474 .read
= vfio_nvidia_3d4_quirk_read
,
475 .write
= vfio_nvidia_3d4_quirk_write
,
476 .endianness
= DEVICE_LITTLE_ENDIAN
,
479 static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque
,
480 hwaddr addr
, unsigned size
)
482 VFIONvidia3d0Quirk
*quirk
= opaque
;
483 VFIOPCIDevice
*vdev
= quirk
->vdev
;
484 VFIONvidia3d0State old_state
= quirk
->state
;
485 uint64_t data
= vfio_vga_read(&vdev
->vga
.region
[QEMU_PCI_VGA_IO_HI
],
490 if (old_state
== READ
&&
491 (quirk
->offset
& ~(PCI_CONFIG_SPACE_SIZE
- 1)) == 0x1800) {
492 uint8_t offset
= quirk
->offset
& (PCI_CONFIG_SPACE_SIZE
- 1);
494 data
= vfio_pci_read_config(&vdev
->pdev
, offset
, size
);
495 trace_vfio_quirk_nvidia_3d0_read(vdev
->vbasedev
.name
,
502 static void vfio_nvidia_3d0_quirk_write(void *opaque
, hwaddr addr
,
503 uint64_t data
, unsigned size
)
505 VFIONvidia3d0Quirk
*quirk
= opaque
;
506 VFIOPCIDevice
*vdev
= quirk
->vdev
;
507 VFIONvidia3d0State old_state
= quirk
->state
;
511 if (old_state
== SELECT
) {
512 quirk
->offset
= (uint32_t)data
;
513 quirk
->state
= WINDOW
;
514 trace_vfio_quirk_nvidia_3d0_state(vdev
->vbasedev
.name
,
515 nv3d0_states
[quirk
->state
]);
516 } else if (old_state
== WRITE
) {
517 if ((quirk
->offset
& ~(PCI_CONFIG_SPACE_SIZE
- 1)) == 0x1800) {
518 uint8_t offset
= quirk
->offset
& (PCI_CONFIG_SPACE_SIZE
- 1);
520 vfio_pci_write_config(&vdev
->pdev
, offset
, data
, size
);
521 trace_vfio_quirk_nvidia_3d0_write(vdev
->vbasedev
.name
,
527 vfio_vga_write(&vdev
->vga
.region
[QEMU_PCI_VGA_IO_HI
],
528 addr
+ 0x10, data
, size
);
531 static const MemoryRegionOps vfio_nvidia_3d0_quirk
= {
532 .read
= vfio_nvidia_3d0_quirk_read
,
533 .write
= vfio_nvidia_3d0_quirk_write
,
534 .endianness
= DEVICE_LITTLE_ENDIAN
,
537 static void vfio_vga_probe_nvidia_3d0_quirk(VFIOPCIDevice
*vdev
)
540 VFIONvidia3d0Quirk
*data
;
542 if (!vfio_pci_is(vdev
, PCI_VENDOR_ID_NVIDIA
, PCI_ANY_ID
) ||
543 !vdev
->bars
[1].region
.size
) {
547 quirk
= g_malloc0(sizeof(*quirk
));
548 quirk
->data
= data
= g_malloc0(sizeof(*data
));
549 quirk
->mem
= g_malloc0_n(sizeof(MemoryRegion
), 2);
553 memory_region_init_io(&quirk
->mem
[0], OBJECT(vdev
), &vfio_nvidia_3d4_quirk
,
554 data
, "vfio-nvidia-3d4-quirk", 2);
555 memory_region_add_subregion(&vdev
->vga
.region
[QEMU_PCI_VGA_IO_HI
].mem
,
556 0x14 /* 0x3c0 + 0x14 */, &quirk
->mem
[0]);
558 memory_region_init_io(&quirk
->mem
[1], OBJECT(vdev
), &vfio_nvidia_3d0_quirk
,
559 data
, "vfio-nvidia-3d0-quirk", 2);
560 memory_region_add_subregion(&vdev
->vga
.region
[QEMU_PCI_VGA_IO_HI
].mem
,
561 0x10 /* 0x3c0 + 0x10 */, &quirk
->mem
[1]);
563 QLIST_INSERT_HEAD(&vdev
->vga
.region
[QEMU_PCI_VGA_IO_HI
].quirks
,
566 trace_vfio_quirk_nvidia_3d0_probe(vdev
->vbasedev
.name
);
570 * The second quirk is documented in envytools. The I/O port BAR5 is just
571 * a set of address/data ports to the MMIO BARs. The BAR we care about is
572 * again BAR0. This backdoor is apparently a bit newer than the one above
573 * so we need to not only trap 256 bytes @0x1800, but all of PCI config
574 * space, including extended space is available at the 4k @0x88000.
576 typedef struct VFIONvidiaBAR5Quirk
{
579 MemoryRegion
*addr_mem
;
580 MemoryRegion
*data_mem
;
582 VFIOConfigWindowQuirk window
; /* last for match data */
583 } VFIONvidiaBAR5Quirk
;
585 static void vfio_nvidia_bar5_enable(VFIONvidiaBAR5Quirk
*bar5
)
587 VFIOPCIDevice
*vdev
= bar5
->window
.vdev
;
589 if (((bar5
->master
& bar5
->enable
) & 0x1) == bar5
->enabled
) {
593 bar5
->enabled
= !bar5
->enabled
;
594 trace_vfio_quirk_nvidia_bar5_state(vdev
->vbasedev
.name
,
595 bar5
->enabled
? "Enable" : "Disable");
596 memory_region_set_enabled(bar5
->addr_mem
, bar5
->enabled
);
597 memory_region_set_enabled(bar5
->data_mem
, bar5
->enabled
);
600 static uint64_t vfio_nvidia_bar5_quirk_master_read(void *opaque
,
601 hwaddr addr
, unsigned size
)
603 VFIONvidiaBAR5Quirk
*bar5
= opaque
;
604 VFIOPCIDevice
*vdev
= bar5
->window
.vdev
;
606 return vfio_region_read(&vdev
->bars
[5].region
, addr
, size
);
609 static void vfio_nvidia_bar5_quirk_master_write(void *opaque
, hwaddr addr
,
610 uint64_t data
, unsigned size
)
612 VFIONvidiaBAR5Quirk
*bar5
= opaque
;
613 VFIOPCIDevice
*vdev
= bar5
->window
.vdev
;
615 vfio_region_write(&vdev
->bars
[5].region
, addr
, data
, size
);
618 vfio_nvidia_bar5_enable(bar5
);
621 static const MemoryRegionOps vfio_nvidia_bar5_quirk_master
= {
622 .read
= vfio_nvidia_bar5_quirk_master_read
,
623 .write
= vfio_nvidia_bar5_quirk_master_write
,
624 .endianness
= DEVICE_LITTLE_ENDIAN
,
627 static uint64_t vfio_nvidia_bar5_quirk_enable_read(void *opaque
,
628 hwaddr addr
, unsigned size
)
630 VFIONvidiaBAR5Quirk
*bar5
= opaque
;
631 VFIOPCIDevice
*vdev
= bar5
->window
.vdev
;
633 return vfio_region_read(&vdev
->bars
[5].region
, addr
+ 4, size
);
636 static void vfio_nvidia_bar5_quirk_enable_write(void *opaque
, hwaddr addr
,
637 uint64_t data
, unsigned size
)
639 VFIONvidiaBAR5Quirk
*bar5
= opaque
;
640 VFIOPCIDevice
*vdev
= bar5
->window
.vdev
;
642 vfio_region_write(&vdev
->bars
[5].region
, addr
+ 4, data
, size
);
645 vfio_nvidia_bar5_enable(bar5
);
648 static const MemoryRegionOps vfio_nvidia_bar5_quirk_enable
= {
649 .read
= vfio_nvidia_bar5_quirk_enable_read
,
650 .write
= vfio_nvidia_bar5_quirk_enable_write
,
651 .endianness
= DEVICE_LITTLE_ENDIAN
,
654 static void vfio_probe_nvidia_bar5_quirk(VFIOPCIDevice
*vdev
, int nr
)
657 VFIONvidiaBAR5Quirk
*bar5
;
658 VFIOConfigWindowQuirk
*window
;
660 if (!vfio_pci_is(vdev
, PCI_VENDOR_ID_NVIDIA
, PCI_ANY_ID
) ||
661 !vdev
->has_vga
|| nr
!= 5) {
665 quirk
= g_malloc0(sizeof(*quirk
));
666 quirk
->mem
= g_malloc0_n(sizeof(MemoryRegion
), 4);
668 bar5
= quirk
->data
= g_malloc0(sizeof(*bar5
) +
669 (sizeof(VFIOConfigWindowMatch
) * 2));
670 window
= &bar5
->window
;
673 window
->address_offset
= 0x8;
674 window
->data_offset
= 0xc;
675 window
->nr_matches
= 2;
676 window
->matches
[0].match
= 0x1800;
677 window
->matches
[0].mask
= PCI_CONFIG_SPACE_SIZE
- 1;
678 window
->matches
[1].match
= 0x88000;
679 window
->matches
[1].mask
= PCIE_CONFIG_SPACE_SIZE
- 1;
681 window
->addr_mem
= bar5
->addr_mem
= &quirk
->mem
[0];
682 window
->data_mem
= bar5
->data_mem
= &quirk
->mem
[1];
684 memory_region_init_io(window
->addr_mem
, OBJECT(vdev
),
685 &vfio_generic_window_address_quirk
, window
,
686 "vfio-nvidia-bar5-window-address-quirk", 4);
687 memory_region_add_subregion_overlap(&vdev
->bars
[nr
].region
.mem
,
688 window
->address_offset
,
689 window
->addr_mem
, 1);
690 memory_region_set_enabled(window
->addr_mem
, false);
692 memory_region_init_io(window
->data_mem
, OBJECT(vdev
),
693 &vfio_generic_window_data_quirk
, window
,
694 "vfio-nvidia-bar5-window-data-quirk", 4);
695 memory_region_add_subregion_overlap(&vdev
->bars
[nr
].region
.mem
,
697 window
->data_mem
, 1);
698 memory_region_set_enabled(window
->data_mem
, false);
700 memory_region_init_io(&quirk
->mem
[2], OBJECT(vdev
),
701 &vfio_nvidia_bar5_quirk_master
, bar5
,
702 "vfio-nvidia-bar5-master-quirk", 4);
703 memory_region_add_subregion_overlap(&vdev
->bars
[nr
].region
.mem
,
704 0, &quirk
->mem
[2], 1);
706 memory_region_init_io(&quirk
->mem
[3], OBJECT(vdev
),
707 &vfio_nvidia_bar5_quirk_enable
, bar5
,
708 "vfio-nvidia-bar5-enable-quirk", 4);
709 memory_region_add_subregion_overlap(&vdev
->bars
[nr
].region
.mem
,
710 4, &quirk
->mem
[3], 1);
712 QLIST_INSERT_HEAD(&vdev
->bars
[nr
].quirks
, quirk
, next
);
714 trace_vfio_quirk_nvidia_bar5_probe(vdev
->vbasedev
.name
);
718 * Finally, BAR0 itself. We want to redirect any accesses to either
719 * 0x1800 or 0x88000 through the PCI config space access functions.
721 static void vfio_nvidia_quirk_mirror_write(void *opaque
, hwaddr addr
,
722 uint64_t data
, unsigned size
)
724 VFIOConfigMirrorQuirk
*mirror
= opaque
;
725 VFIOPCIDevice
*vdev
= mirror
->vdev
;
726 PCIDevice
*pdev
= &vdev
->pdev
;
728 vfio_generic_quirk_mirror_write(opaque
, addr
, data
, size
);
731 * Nvidia seems to acknowledge MSI interrupts by writing 0xff to the
732 * MSI capability ID register. Both the ID and next register are
733 * read-only, so we allow writes covering either of those to real hw.
735 if ((pdev
->cap_present
& QEMU_PCI_CAP_MSI
) &&
736 vfio_range_contained(addr
, size
, pdev
->msi_cap
, PCI_MSI_FLAGS
)) {
737 vfio_region_write(&vdev
->bars
[mirror
->bar
].region
,
738 addr
+ mirror
->offset
, data
, size
);
739 trace_vfio_quirk_nvidia_bar0_msi_ack(vdev
->vbasedev
.name
);
743 static const MemoryRegionOps vfio_nvidia_mirror_quirk
= {
744 .read
= vfio_generic_quirk_mirror_read
,
745 .write
= vfio_nvidia_quirk_mirror_write
,
746 .endianness
= DEVICE_LITTLE_ENDIAN
,
749 static void vfio_probe_nvidia_bar0_quirk(VFIOPCIDevice
*vdev
, int nr
)
752 VFIOConfigMirrorQuirk
*mirror
;
754 if (!vfio_pci_is(vdev
, PCI_VENDOR_ID_NVIDIA
, PCI_ANY_ID
) ||
755 !vfio_is_vga(vdev
) || nr
!= 0) {
759 quirk
= g_malloc0(sizeof(*quirk
));
760 mirror
= quirk
->data
= g_malloc0(sizeof(*mirror
));
761 mirror
->mem
= quirk
->mem
= g_malloc0_n(sizeof(MemoryRegion
), 1);
764 mirror
->offset
= 0x88000;
767 memory_region_init_io(mirror
->mem
, OBJECT(vdev
),
768 &vfio_nvidia_mirror_quirk
, mirror
,
769 "vfio-nvidia-bar0-88000-mirror-quirk",
770 PCIE_CONFIG_SPACE_SIZE
);
771 memory_region_add_subregion_overlap(&vdev
->bars
[nr
].region
.mem
,
772 mirror
->offset
, mirror
->mem
, 1);
774 QLIST_INSERT_HEAD(&vdev
->bars
[nr
].quirks
, quirk
, next
);
776 /* The 0x1800 offset mirror only seems to get used by legacy VGA */
778 quirk
= g_malloc0(sizeof(*quirk
));
779 mirror
= quirk
->data
= g_malloc0(sizeof(*mirror
));
780 mirror
->mem
= quirk
->mem
= g_malloc0_n(sizeof(MemoryRegion
), 1);
783 mirror
->offset
= 0x1800;
786 memory_region_init_io(mirror
->mem
, OBJECT(vdev
),
787 &vfio_nvidia_mirror_quirk
, mirror
,
788 "vfio-nvidia-bar0-1800-mirror-quirk",
789 PCI_CONFIG_SPACE_SIZE
);
790 memory_region_add_subregion_overlap(&vdev
->bars
[nr
].region
.mem
,
791 mirror
->offset
, mirror
->mem
, 1);
793 QLIST_INSERT_HEAD(&vdev
->bars
[nr
].quirks
, quirk
, next
);
796 trace_vfio_quirk_nvidia_bar0_probe(vdev
->vbasedev
.name
);
800 * TODO - Some Nvidia devices provide config access to their companion HDA
801 * device and even to their parent bridge via these config space mirrors.
802 * Add quirks for those regions.
805 #define PCI_VENDOR_ID_REALTEK 0x10ec
808 * RTL8168 devices have a backdoor that can access the MSI-X table. At BAR2
809 * offset 0x70 there is a dword data register, offset 0x74 is a dword address
810 * register. According to the Linux r8169 driver, the MSI-X table is addressed
811 * when the "type" portion of the address register is set to 0x1. This appears
812 * to be bits 16:30. Bit 31 is both a write indicator and some sort of
813 * "address latched" indicator. Bits 12:15 are a mask field, which we can
814 * ignore because the MSI-X table should always be accessed as a dword (full
815 * mask). Bits 0:11 is offset within the type.
819 * Read from MSI-X table offset 0
820 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x1f000, 4) // store read addr
821 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x8001f000 // latch
822 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x70, 4) = 0xfee00398 // read data
824 * Write 0xfee00000 to MSI-X table offset 0
825 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x70, 0xfee00000, 4) // write data
826 * vfio: vfio_bar_write(0000:05:00.0:BAR2+0x74, 0x8001f000, 4) // do write
827 * vfio: vfio_bar_read(0000:05:00.0:BAR2+0x74, 4) = 0x1f000 // complete
829 typedef struct VFIOrtl8168Quirk
{
836 static uint64_t vfio_rtl8168_quirk_address_read(void *opaque
,
837 hwaddr addr
, unsigned size
)
839 VFIOrtl8168Quirk
*rtl
= opaque
;
840 VFIOPCIDevice
*vdev
= rtl
->vdev
;
841 uint64_t data
= vfio_region_read(&vdev
->bars
[2].region
, addr
+ 0x74, size
);
844 data
= rtl
->addr
^ 0x80000000U
; /* latch/complete */
845 trace_vfio_quirk_rtl8168_fake_latch(vdev
->vbasedev
.name
, data
);
851 static void vfio_rtl8168_quirk_address_write(void *opaque
, hwaddr addr
,
852 uint64_t data
, unsigned size
)
854 VFIOrtl8168Quirk
*rtl
= opaque
;
855 VFIOPCIDevice
*vdev
= rtl
->vdev
;
857 rtl
->enabled
= false;
859 if ((data
& 0x7fff0000) == 0x10000) { /* MSI-X table */
861 rtl
->addr
= (uint32_t)data
;
863 if (data
& 0x80000000U
) { /* Do write */
864 if (vdev
->pdev
.cap_present
& QEMU_PCI_CAP_MSIX
) {
865 hwaddr offset
= data
& 0xfff;
866 uint64_t val
= rtl
->data
;
868 trace_vfio_quirk_rtl8168_msix_write(vdev
->vbasedev
.name
,
869 (uint16_t)offset
, val
);
871 /* Write to the proper guest MSI-X table instead */
872 memory_region_dispatch_write(&vdev
->pdev
.msix_table_mmio
,
874 MEMTXATTRS_UNSPECIFIED
);
876 return; /* Do not write guest MSI-X data to hardware */
880 vfio_region_write(&vdev
->bars
[2].region
, addr
+ 0x74, data
, size
);
883 static const MemoryRegionOps vfio_rtl_address_quirk
= {
884 .read
= vfio_rtl8168_quirk_address_read
,
885 .write
= vfio_rtl8168_quirk_address_write
,
887 .min_access_size
= 4,
888 .max_access_size
= 4,
891 .endianness
= DEVICE_LITTLE_ENDIAN
,
894 static uint64_t vfio_rtl8168_quirk_data_read(void *opaque
,
895 hwaddr addr
, unsigned size
)
897 VFIOrtl8168Quirk
*rtl
= opaque
;
898 VFIOPCIDevice
*vdev
= rtl
->vdev
;
899 uint64_t data
= vfio_region_read(&vdev
->bars
[2].region
, addr
+ 0x74, size
);
901 if (rtl
->enabled
&& (vdev
->pdev
.cap_present
& QEMU_PCI_CAP_MSIX
)) {
902 hwaddr offset
= rtl
->addr
& 0xfff;
903 memory_region_dispatch_read(&vdev
->pdev
.msix_table_mmio
, offset
,
904 &data
, size
, MEMTXATTRS_UNSPECIFIED
);
905 trace_vfio_quirk_rtl8168_msix_read(vdev
->vbasedev
.name
, offset
, data
);
911 static void vfio_rtl8168_quirk_data_write(void *opaque
, hwaddr addr
,
912 uint64_t data
, unsigned size
)
914 VFIOrtl8168Quirk
*rtl
= opaque
;
915 VFIOPCIDevice
*vdev
= rtl
->vdev
;
917 rtl
->data
= (uint32_t)data
;
919 vfio_region_write(&vdev
->bars
[2].region
, addr
+ 0x70, data
, size
);
922 static const MemoryRegionOps vfio_rtl_data_quirk
= {
923 .read
= vfio_rtl8168_quirk_data_read
,
924 .write
= vfio_rtl8168_quirk_data_write
,
926 .min_access_size
= 4,
927 .max_access_size
= 4,
930 .endianness
= DEVICE_LITTLE_ENDIAN
,
933 static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice
*vdev
, int nr
)
936 VFIOrtl8168Quirk
*rtl
;
938 if (!vfio_pci_is(vdev
, PCI_VENDOR_ID_REALTEK
, 0x8168) || nr
!= 2) {
942 quirk
= g_malloc0(sizeof(*quirk
));
943 quirk
->mem
= g_malloc0_n(sizeof(MemoryRegion
), 2);
945 quirk
->data
= rtl
= g_malloc0(sizeof(*rtl
));
948 memory_region_init_io(&quirk
->mem
[0], OBJECT(vdev
),
949 &vfio_rtl_address_quirk
, rtl
,
950 "vfio-rtl8168-window-address-quirk", 4);
951 memory_region_add_subregion_overlap(&vdev
->bars
[nr
].region
.mem
,
952 0x74, &quirk
->mem
[0], 1);
954 memory_region_init_io(&quirk
->mem
[1], OBJECT(vdev
),
955 &vfio_rtl_data_quirk
, rtl
,
956 "vfio-rtl8168-window-data-quirk", 4);
957 memory_region_add_subregion_overlap(&vdev
->bars
[nr
].region
.mem
,
958 0x70, &quirk
->mem
[1], 1);
960 QLIST_INSERT_HEAD(&vdev
->bars
[nr
].quirks
, quirk
, next
);
962 trace_vfio_quirk_rtl8168_probe(vdev
->vbasedev
.name
);
966 * Common quirk probe entry points.
968 void vfio_vga_quirk_setup(VFIOPCIDevice
*vdev
)
970 vfio_vga_probe_ati_3c3_quirk(vdev
);
971 vfio_vga_probe_nvidia_3d0_quirk(vdev
);
974 void vfio_vga_quirk_teardown(VFIOPCIDevice
*vdev
)
979 for (i
= 0; i
< ARRAY_SIZE(vdev
->vga
.region
); i
++) {
980 QLIST_FOREACH(quirk
, &vdev
->vga
.region
[i
].quirks
, next
) {
981 for (j
= 0; j
< quirk
->nr_mem
; j
++) {
982 memory_region_del_subregion(&vdev
->vga
.region
[i
].mem
,
989 void vfio_vga_quirk_free(VFIOPCIDevice
*vdev
)
993 for (i
= 0; i
< ARRAY_SIZE(vdev
->vga
.region
); i
++) {
994 while (!QLIST_EMPTY(&vdev
->vga
.region
[i
].quirks
)) {
995 VFIOQuirk
*quirk
= QLIST_FIRST(&vdev
->vga
.region
[i
].quirks
);
996 QLIST_REMOVE(quirk
, next
);
997 for (j
= 0; j
< quirk
->nr_mem
; j
++) {
998 object_unparent(OBJECT(&quirk
->mem
[j
]));
1001 g_free(quirk
->data
);
1007 void vfio_bar_quirk_setup(VFIOPCIDevice
*vdev
, int nr
)
1009 vfio_probe_ati_bar4_quirk(vdev
, nr
);
1010 vfio_probe_ati_bar2_quirk(vdev
, nr
);
1011 vfio_probe_nvidia_bar5_quirk(vdev
, nr
);
1012 vfio_probe_nvidia_bar0_quirk(vdev
, nr
);
1013 vfio_probe_rtl8168_bar2_quirk(vdev
, nr
);
1016 void vfio_bar_quirk_teardown(VFIOPCIDevice
*vdev
, int nr
)
1018 VFIOBAR
*bar
= &vdev
->bars
[nr
];
1022 QLIST_FOREACH(quirk
, &bar
->quirks
, next
) {
1023 for (i
= 0; i
< quirk
->nr_mem
; i
++) {
1024 memory_region_del_subregion(&bar
->region
.mem
, &quirk
->mem
[i
]);
1029 void vfio_bar_quirk_free(VFIOPCIDevice
*vdev
, int nr
)
1031 VFIOBAR
*bar
= &vdev
->bars
[nr
];
1034 while (!QLIST_EMPTY(&bar
->quirks
)) {
1035 VFIOQuirk
*quirk
= QLIST_FIRST(&bar
->quirks
);
1036 QLIST_REMOVE(quirk
, next
);
1037 for (i
= 0; i
< quirk
->nr_mem
; i
++) {
1038 object_unparent(OBJECT(&quirk
->mem
[i
]));
1041 g_free(quirk
->data
);
1051 * AMD Radeon PCI config reset, based on Linux:
1052 * drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running()
1053 * drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset
1054 * drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc()
1055 * drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock()
1056 * IDs: include/drm/drm_pciids.h
1057 * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0
1059 * Bonaire and Hawaii GPUs do not respond to a bus reset. This is a bug in the
1060 * hardware that should be fixed on future ASICs. The symptom of this is that
1061 * once the accerlated driver loads, Windows guests will bsod on subsequent
1062 * attmpts to load the driver, such as after VM reset or shutdown/restart. To
1063 * work around this, we do an AMD specific PCI config reset, followed by an SMC
1064 * reset. The PCI config reset only works if SMC firmware is running, so we
1065 * have a dependency on the state of the device as to whether this reset will
1066 * be effective. There are still cases where we won't be able to kick the
1067 * device into working, but this greatly improves the usability overall. The
1068 * config reset magic is relatively common on AMD GPUs, but the setup and SMC
1069 * poking is largely ASIC specific.
1071 static bool vfio_radeon_smc_is_running(VFIOPCIDevice
*vdev
)
1076 * Registers 200h and 204h are index and data registers for accessing
1077 * indirect configuration registers within the device.
1079 vfio_region_write(&vdev
->bars
[5].region
, 0x200, 0x80000004, 4);
1080 clk
= vfio_region_read(&vdev
->bars
[5].region
, 0x204, 4);
1081 vfio_region_write(&vdev
->bars
[5].region
, 0x200, 0x80000370, 4);
1082 pc_c
= vfio_region_read(&vdev
->bars
[5].region
, 0x204, 4);
1084 return (!(clk
& 1) && (0x20100 <= pc_c
));
1088 * The scope of a config reset is controlled by a mode bit in the misc register
1089 * and a fuse, exposed as a bit in another register. The fuse is the default
1090 * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula
1091 * scope = !(misc ^ fuse), where the resulting scope is defined the same as
1092 * the fuse. A truth table therefore tells us that if misc == fuse, we need
1093 * to flip the value of the bit in the misc register.
1095 static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice
*vdev
)
1097 uint32_t misc
, fuse
;
1100 vfio_region_write(&vdev
->bars
[5].region
, 0x200, 0xc00c0000, 4);
1101 fuse
= vfio_region_read(&vdev
->bars
[5].region
, 0x204, 4);
1104 vfio_region_write(&vdev
->bars
[5].region
, 0x200, 0xc0000010, 4);
1105 misc
= vfio_region_read(&vdev
->bars
[5].region
, 0x204, 4);
1109 vfio_region_write(&vdev
->bars
[5].region
, 0x204, misc
^ 2, 4);
1110 vfio_region_read(&vdev
->bars
[5].region
, 0x204, 4); /* flush */
1114 static int vfio_radeon_reset(VFIOPCIDevice
*vdev
)
1116 PCIDevice
*pdev
= &vdev
->pdev
;
1120 /* Defer to a kernel implemented reset */
1121 if (vdev
->vbasedev
.reset_works
) {
1122 trace_vfio_quirk_ati_bonaire_reset_skipped(vdev
->vbasedev
.name
);
1126 /* Enable only memory BAR access */
1127 vfio_pci_write_config(pdev
, PCI_COMMAND
, PCI_COMMAND_MEMORY
, 2);
1129 /* Reset only works if SMC firmware is loaded and running */
1130 if (!vfio_radeon_smc_is_running(vdev
)) {
1132 trace_vfio_quirk_ati_bonaire_reset_no_smc(vdev
->vbasedev
.name
);
1136 /* Make sure only the GFX function is reset */
1137 vfio_radeon_set_gfx_only_reset(vdev
);
1139 /* AMD PCI config reset */
1140 vfio_pci_write_config(pdev
, 0x7c, 0x39d5e86b, 4);
1143 /* Read back the memory size to make sure we're out of reset */
1144 for (i
= 0; i
< 100000; i
++) {
1145 if (vfio_region_read(&vdev
->bars
[5].region
, 0x5428, 4) != 0xffffffff) {
1151 trace_vfio_quirk_ati_bonaire_reset_timeout(vdev
->vbasedev
.name
);
1155 vfio_region_write(&vdev
->bars
[5].region
, 0x200, 0x80000000, 4);
1156 data
= vfio_region_read(&vdev
->bars
[5].region
, 0x204, 4);
1158 vfio_region_write(&vdev
->bars
[5].region
, 0x204, data
, 4);
1160 /* Disable SMC clock */
1161 vfio_region_write(&vdev
->bars
[5].region
, 0x200, 0x80000004, 4);
1162 data
= vfio_region_read(&vdev
->bars
[5].region
, 0x204, 4);
1164 vfio_region_write(&vdev
->bars
[5].region
, 0x204, data
, 4);
1166 trace_vfio_quirk_ati_bonaire_reset_done(vdev
->vbasedev
.name
);
1169 /* Restore PCI command register */
1170 vfio_pci_write_config(pdev
, PCI_COMMAND
, 0, 2);
1175 void vfio_setup_resetfn_quirk(VFIOPCIDevice
*vdev
)
1177 switch (vdev
->vendor_id
) {
1179 switch (vdev
->device_id
) {
1181 case 0x6649: /* Bonaire [FirePro W5100] */
1184 case 0x6658: /* Bonaire XTX [Radeon R7 260X] */
1185 case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */
1186 case 0x665d: /* Bonaire [Radeon R7 200 Series] */
1188 case 0x67A0: /* Hawaii XT GL [FirePro W9100] */
1189 case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */
1194 case 0x67B0: /* Hawaii XT [Radeon R9 290X] */
1195 case 0x67B1: /* Hawaii PRO [Radeon R9 290] */
1200 vdev
->resetfn
= vfio_radeon_reset
;
1201 trace_vfio_quirk_ati_bonaire_reset(vdev
->vbasedev
.name
);