2 * low level and IOMMU backend agnostic helpers used by VFIO devices,
3 * related to regions, interrupts, capabilities
5 * Copyright Red Hat, Inc. 2012
8 * Alex Williamson <alex.williamson@redhat.com>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
13 * Based on qemu-kvm device-assignment:
14 * Adapted for KVM by Qumranet.
15 * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
16 * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
17 * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
18 * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
19 * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
22 #include "qemu/osdep.h"
23 #include <sys/ioctl.h>
25 #include "hw/vfio/vfio-common.h"
28 #include "qapi/error.h"
29 #include "qemu/error-report.h"
30 #include "monitor/monitor.h"
33 * Common VFIO interrupt disable
35 void vfio_disable_irqindex(VFIODevice
*vbasedev
, int index
)
37 struct vfio_irq_set irq_set
= {
38 .argsz
= sizeof(irq_set
),
39 .flags
= VFIO_IRQ_SET_DATA_NONE
| VFIO_IRQ_SET_ACTION_TRIGGER
,
45 ioctl(vbasedev
->fd
, VFIO_DEVICE_SET_IRQS
, &irq_set
);
48 void vfio_unmask_single_irqindex(VFIODevice
*vbasedev
, int index
)
50 struct vfio_irq_set irq_set
= {
51 .argsz
= sizeof(irq_set
),
52 .flags
= VFIO_IRQ_SET_DATA_NONE
| VFIO_IRQ_SET_ACTION_UNMASK
,
58 ioctl(vbasedev
->fd
, VFIO_DEVICE_SET_IRQS
, &irq_set
);
61 void vfio_mask_single_irqindex(VFIODevice
*vbasedev
, int index
)
63 struct vfio_irq_set irq_set
= {
64 .argsz
= sizeof(irq_set
),
65 .flags
= VFIO_IRQ_SET_DATA_NONE
| VFIO_IRQ_SET_ACTION_MASK
,
71 ioctl(vbasedev
->fd
, VFIO_DEVICE_SET_IRQS
, &irq_set
);
74 static inline const char *action_to_str(int action
)
77 case VFIO_IRQ_SET_ACTION_MASK
:
79 case VFIO_IRQ_SET_ACTION_UNMASK
:
81 case VFIO_IRQ_SET_ACTION_TRIGGER
:
84 return "UNKNOWN ACTION";
88 static const char *index_to_str(VFIODevice
*vbasedev
, int index
)
90 if (vbasedev
->type
!= VFIO_DEVICE_TYPE_PCI
) {
95 case VFIO_PCI_INTX_IRQ_INDEX
:
97 case VFIO_PCI_MSI_IRQ_INDEX
:
99 case VFIO_PCI_MSIX_IRQ_INDEX
:
101 case VFIO_PCI_ERR_IRQ_INDEX
:
103 case VFIO_PCI_REQ_IRQ_INDEX
:
110 int vfio_set_irq_signaling(VFIODevice
*vbasedev
, int index
, int subindex
,
111 int action
, int fd
, Error
**errp
)
114 struct vfio_irq_set
*irq_set
;
119 argsz
= sizeof(*irq_set
) + sizeof(*pfd
);
121 irq_set
= g_malloc0(argsz
);
122 irq_set
->argsz
= argsz
;
123 irq_set
->flags
= VFIO_IRQ_SET_DATA_EVENTFD
| action
;
124 irq_set
->index
= index
;
125 irq_set
->start
= subindex
;
127 pfd
= (int32_t *)&irq_set
->data
;
130 if (ioctl(vbasedev
->fd
, VFIO_DEVICE_SET_IRQS
, irq_set
)) {
139 error_setg_errno(errp
, -ret
, "VFIO_DEVICE_SET_IRQS failure");
141 name
= index_to_str(vbasedev
, index
);
143 error_prepend(errp
, "%s-%d: ", name
, subindex
);
145 error_prepend(errp
, "index %d-%d: ", index
, subindex
);
148 "Failed to %s %s eventfd signaling for interrupt ",
149 fd
< 0 ? "tear down" : "set up", action_to_str(action
));
154 * IO Port/MMIO - Beware of the endians, VFIO is always little endian
156 void vfio_region_write(void *opaque
, hwaddr addr
,
157 uint64_t data
, unsigned size
)
159 VFIORegion
*region
= opaque
;
160 VFIODevice
*vbasedev
= region
->vbasedev
;
173 buf
.word
= cpu_to_le16(data
);
176 buf
.dword
= cpu_to_le32(data
);
179 buf
.qword
= cpu_to_le64(data
);
182 hw_error("vfio: unsupported write size, %u bytes", size
);
186 if (pwrite(vbasedev
->fd
, &buf
, size
, region
->fd_offset
+ addr
) != size
) {
187 error_report("%s(%s:region%d+0x%"HWADDR_PRIx
", 0x%"PRIx64
189 __func__
, vbasedev
->name
, region
->nr
,
193 trace_vfio_region_write(vbasedev
->name
, region
->nr
, addr
, data
, size
);
196 * A read or write to a BAR always signals an INTx EOI. This will
197 * do nothing if not pending (including not in INTx mode). We assume
198 * that a BAR access is in response to an interrupt and that BAR
199 * accesses will service the interrupt. Unfortunately, we don't know
200 * which access will service the interrupt, so we're potentially
201 * getting quite a few host interrupts per guest interrupt.
203 vbasedev
->ops
->vfio_eoi(vbasedev
);
206 uint64_t vfio_region_read(void *opaque
,
207 hwaddr addr
, unsigned size
)
209 VFIORegion
*region
= opaque
;
210 VFIODevice
*vbasedev
= region
->vbasedev
;
219 if (pread(vbasedev
->fd
, &buf
, size
, region
->fd_offset
+ addr
) != size
) {
220 error_report("%s(%s:region%d+0x%"HWADDR_PRIx
", %d) failed: %m",
221 __func__
, vbasedev
->name
, region
->nr
,
230 data
= le16_to_cpu(buf
.word
);
233 data
= le32_to_cpu(buf
.dword
);
236 data
= le64_to_cpu(buf
.qword
);
239 hw_error("vfio: unsupported read size, %u bytes", size
);
243 trace_vfio_region_read(vbasedev
->name
, region
->nr
, addr
, size
, data
);
245 /* Same as write above */
246 vbasedev
->ops
->vfio_eoi(vbasedev
);
251 const MemoryRegionOps vfio_region_ops
= {
252 .read
= vfio_region_read
,
253 .write
= vfio_region_write
,
254 .endianness
= DEVICE_LITTLE_ENDIAN
,
256 .min_access_size
= 1,
257 .max_access_size
= 8,
260 .min_access_size
= 1,
261 .max_access_size
= 8,
265 int vfio_bitmap_alloc(VFIOBitmap
*vbmap
, hwaddr size
)
267 vbmap
->pages
= REAL_HOST_PAGE_ALIGN(size
) / qemu_real_host_page_size();
268 vbmap
->size
= ROUND_UP(vbmap
->pages
, sizeof(__u64
) * BITS_PER_BYTE
) /
270 vbmap
->bitmap
= g_try_malloc0(vbmap
->size
);
271 if (!vbmap
->bitmap
) {
278 struct vfio_info_cap_header
*
279 vfio_get_cap(void *ptr
, uint32_t cap_offset
, uint16_t id
)
281 struct vfio_info_cap_header
*hdr
;
283 for (hdr
= ptr
+ cap_offset
; hdr
!= ptr
; hdr
= ptr
+ hdr
->next
) {
292 struct vfio_info_cap_header
*
293 vfio_get_region_info_cap(struct vfio_region_info
*info
, uint16_t id
)
295 if (!(info
->flags
& VFIO_REGION_INFO_FLAG_CAPS
)) {
299 return vfio_get_cap((void *)info
, info
->cap_offset
, id
);
302 struct vfio_info_cap_header
*
303 vfio_get_device_info_cap(struct vfio_device_info
*info
, uint16_t id
)
305 if (!(info
->flags
& VFIO_DEVICE_FLAGS_CAPS
)) {
309 return vfio_get_cap((void *)info
, info
->cap_offset
, id
);
312 static int vfio_setup_region_sparse_mmaps(VFIORegion
*region
,
313 struct vfio_region_info
*info
)
315 struct vfio_info_cap_header
*hdr
;
316 struct vfio_region_info_cap_sparse_mmap
*sparse
;
319 hdr
= vfio_get_region_info_cap(info
, VFIO_REGION_INFO_CAP_SPARSE_MMAP
);
324 sparse
= container_of(hdr
, struct vfio_region_info_cap_sparse_mmap
, header
);
326 trace_vfio_region_sparse_mmap_header(region
->vbasedev
->name
,
327 region
->nr
, sparse
->nr_areas
);
329 region
->mmaps
= g_new0(VFIOMmap
, sparse
->nr_areas
);
331 for (i
= 0, j
= 0; i
< sparse
->nr_areas
; i
++) {
332 if (sparse
->areas
[i
].size
) {
333 trace_vfio_region_sparse_mmap_entry(i
, sparse
->areas
[i
].offset
,
334 sparse
->areas
[i
].offset
+
335 sparse
->areas
[i
].size
- 1);
336 region
->mmaps
[j
].offset
= sparse
->areas
[i
].offset
;
337 region
->mmaps
[j
].size
= sparse
->areas
[i
].size
;
342 region
->nr_mmaps
= j
;
343 region
->mmaps
= g_realloc(region
->mmaps
, j
* sizeof(VFIOMmap
));
348 int vfio_region_setup(Object
*obj
, VFIODevice
*vbasedev
, VFIORegion
*region
,
349 int index
, const char *name
)
351 struct vfio_region_info
*info
;
354 ret
= vfio_get_region_info(vbasedev
, index
, &info
);
359 region
->vbasedev
= vbasedev
;
360 region
->flags
= info
->flags
;
361 region
->size
= info
->size
;
362 region
->fd_offset
= info
->offset
;
366 region
->mem
= g_new0(MemoryRegion
, 1);
367 memory_region_init_io(region
->mem
, obj
, &vfio_region_ops
,
368 region
, name
, region
->size
);
370 if (!vbasedev
->no_mmap
&&
371 region
->flags
& VFIO_REGION_INFO_FLAG_MMAP
) {
373 ret
= vfio_setup_region_sparse_mmaps(region
, info
);
376 region
->nr_mmaps
= 1;
377 region
->mmaps
= g_new0(VFIOMmap
, region
->nr_mmaps
);
378 region
->mmaps
[0].offset
= 0;
379 region
->mmaps
[0].size
= region
->size
;
386 trace_vfio_region_setup(vbasedev
->name
, index
, name
,
387 region
->flags
, region
->fd_offset
, region
->size
);
391 static void vfio_subregion_unmap(VFIORegion
*region
, int index
)
393 trace_vfio_region_unmap(memory_region_name(®ion
->mmaps
[index
].mem
),
394 region
->mmaps
[index
].offset
,
395 region
->mmaps
[index
].offset
+
396 region
->mmaps
[index
].size
- 1);
397 memory_region_del_subregion(region
->mem
, ®ion
->mmaps
[index
].mem
);
398 munmap(region
->mmaps
[index
].mmap
, region
->mmaps
[index
].size
);
399 object_unparent(OBJECT(®ion
->mmaps
[index
].mem
));
400 region
->mmaps
[index
].mmap
= NULL
;
403 int vfio_region_mmap(VFIORegion
*region
)
412 prot
|= region
->flags
& VFIO_REGION_INFO_FLAG_READ
? PROT_READ
: 0;
413 prot
|= region
->flags
& VFIO_REGION_INFO_FLAG_WRITE
? PROT_WRITE
: 0;
415 for (i
= 0; i
< region
->nr_mmaps
; i
++) {
416 region
->mmaps
[i
].mmap
= mmap(NULL
, region
->mmaps
[i
].size
, prot
,
417 MAP_SHARED
, region
->vbasedev
->fd
,
419 region
->mmaps
[i
].offset
);
420 if (region
->mmaps
[i
].mmap
== MAP_FAILED
) {
423 trace_vfio_region_mmap_fault(memory_region_name(region
->mem
), i
,
425 region
->mmaps
[i
].offset
,
427 region
->mmaps
[i
].offset
+
428 region
->mmaps
[i
].size
- 1, ret
);
430 region
->mmaps
[i
].mmap
= NULL
;
432 for (i
--; i
>= 0; i
--) {
433 vfio_subregion_unmap(region
, i
);
439 name
= g_strdup_printf("%s mmaps[%d]",
440 memory_region_name(region
->mem
), i
);
441 memory_region_init_ram_device_ptr(®ion
->mmaps
[i
].mem
,
442 memory_region_owner(region
->mem
),
443 name
, region
->mmaps
[i
].size
,
444 region
->mmaps
[i
].mmap
);
446 memory_region_add_subregion(region
->mem
, region
->mmaps
[i
].offset
,
447 ®ion
->mmaps
[i
].mem
);
449 trace_vfio_region_mmap(memory_region_name(®ion
->mmaps
[i
].mem
),
450 region
->mmaps
[i
].offset
,
451 region
->mmaps
[i
].offset
+
452 region
->mmaps
[i
].size
- 1);
458 void vfio_region_unmap(VFIORegion
*region
)
466 for (i
= 0; i
< region
->nr_mmaps
; i
++) {
467 if (region
->mmaps
[i
].mmap
) {
468 vfio_subregion_unmap(region
, i
);
473 void vfio_region_exit(VFIORegion
*region
)
481 for (i
= 0; i
< region
->nr_mmaps
; i
++) {
482 if (region
->mmaps
[i
].mmap
) {
483 memory_region_del_subregion(region
->mem
, ®ion
->mmaps
[i
].mem
);
487 trace_vfio_region_exit(region
->vbasedev
->name
, region
->nr
);
490 void vfio_region_finalize(VFIORegion
*region
)
498 for (i
= 0; i
< region
->nr_mmaps
; i
++) {
499 if (region
->mmaps
[i
].mmap
) {
500 munmap(region
->mmaps
[i
].mmap
, region
->mmaps
[i
].size
);
501 object_unparent(OBJECT(®ion
->mmaps
[i
].mem
));
505 object_unparent(OBJECT(region
->mem
));
508 g_free(region
->mmaps
);
510 trace_vfio_region_finalize(region
->vbasedev
->name
, region
->nr
);
513 region
->mmaps
= NULL
;
514 region
->nr_mmaps
= 0;
520 void vfio_region_mmaps_set_enabled(VFIORegion
*region
, bool enabled
)
528 for (i
= 0; i
< region
->nr_mmaps
; i
++) {
529 if (region
->mmaps
[i
].mmap
) {
530 memory_region_set_enabled(®ion
->mmaps
[i
].mem
, enabled
);
534 trace_vfio_region_mmaps_set_enabled(memory_region_name(region
->mem
),
538 int vfio_get_region_info(VFIODevice
*vbasedev
, int index
,
539 struct vfio_region_info
**info
)
541 size_t argsz
= sizeof(struct vfio_region_info
);
543 *info
= g_malloc0(argsz
);
545 (*info
)->index
= index
;
547 (*info
)->argsz
= argsz
;
549 if (ioctl(vbasedev
->fd
, VFIO_DEVICE_GET_REGION_INFO
, *info
)) {
555 if ((*info
)->argsz
> argsz
) {
556 argsz
= (*info
)->argsz
;
557 *info
= g_realloc(*info
, argsz
);
565 int vfio_get_dev_region_info(VFIODevice
*vbasedev
, uint32_t type
,
566 uint32_t subtype
, struct vfio_region_info
**info
)
570 for (i
= 0; i
< vbasedev
->num_regions
; i
++) {
571 struct vfio_info_cap_header
*hdr
;
572 struct vfio_region_info_cap_type
*cap_type
;
574 if (vfio_get_region_info(vbasedev
, i
, info
)) {
578 hdr
= vfio_get_region_info_cap(*info
, VFIO_REGION_INFO_CAP_TYPE
);
584 cap_type
= container_of(hdr
, struct vfio_region_info_cap_type
, header
);
586 trace_vfio_get_dev_region(vbasedev
->name
, i
,
587 cap_type
->type
, cap_type
->subtype
);
589 if (cap_type
->type
== type
&& cap_type
->subtype
== subtype
) {
600 bool vfio_has_region_cap(VFIODevice
*vbasedev
, int region
, uint16_t cap_type
)
602 struct vfio_region_info
*info
= NULL
;
605 if (!vfio_get_region_info(vbasedev
, region
, &info
)) {
606 if (vfio_get_region_info_cap(info
, cap_type
)) {
615 int vfio_device_get_name(VFIODevice
*vbasedev
, Error
**errp
)
620 if (vbasedev
->fd
< 0) {
621 if (stat(vbasedev
->sysfsdev
, &st
) < 0) {
622 error_setg_errno(errp
, errno
, "no such host device");
623 error_prepend(errp
, VFIO_MSG_PREFIX
, vbasedev
->sysfsdev
);
626 /* User may specify a name, e.g: VFIO platform device */
627 if (!vbasedev
->name
) {
628 vbasedev
->name
= g_path_get_basename(vbasedev
->sysfsdev
);
631 if (!vbasedev
->iommufd
) {
632 error_setg(errp
, "Use FD passing only with iommufd backend");
636 * Give a name with fd so any function printing out vbasedev->name
639 if (!vbasedev
->name
) {
640 vbasedev
->name
= g_strdup_printf("VFIO_FD%d", vbasedev
->fd
);
647 void vfio_device_set_fd(VFIODevice
*vbasedev
, const char *str
, Error
**errp
)
650 int fd
= monitor_fd_param(monitor_cur(), str
, errp
);
653 error_prepend(errp
, "Could not parse remote object fd %s:", str
);
659 void vfio_device_init(VFIODevice
*vbasedev
, int type
, VFIODeviceOps
*ops
,
660 DeviceState
*dev
, bool ram_discard
)
662 vbasedev
->type
= type
;
667 vbasedev
->ram_block_discard_allowed
= ram_discard
;