2 * low level and IOMMU backend agnostic helpers used by VFIO devices,
3 * related to regions, interrupts, capabilities
5 * Copyright Red Hat, Inc. 2012
8 * Alex Williamson <alex.williamson@redhat.com>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
13 * Based on qemu-kvm device-assignment:
14 * Adapted for KVM by Qumranet.
15 * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
16 * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
17 * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
18 * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
19 * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
22 #include "qemu/osdep.h"
23 #include <sys/ioctl.h>
25 #include "hw/vfio/vfio-common.h"
28 #include "qapi/error.h"
29 #include "qemu/error-report.h"
30 #include "monitor/monitor.h"
33 * Common VFIO interrupt disable
35 void vfio_disable_irqindex(VFIODevice
*vbasedev
, int index
)
37 struct vfio_irq_set irq_set
= {
38 .argsz
= sizeof(irq_set
),
39 .flags
= VFIO_IRQ_SET_DATA_NONE
| VFIO_IRQ_SET_ACTION_TRIGGER
,
45 ioctl(vbasedev
->fd
, VFIO_DEVICE_SET_IRQS
, &irq_set
);
48 void vfio_unmask_single_irqindex(VFIODevice
*vbasedev
, int index
)
50 struct vfio_irq_set irq_set
= {
51 .argsz
= sizeof(irq_set
),
52 .flags
= VFIO_IRQ_SET_DATA_NONE
| VFIO_IRQ_SET_ACTION_UNMASK
,
58 ioctl(vbasedev
->fd
, VFIO_DEVICE_SET_IRQS
, &irq_set
);
61 void vfio_mask_single_irqindex(VFIODevice
*vbasedev
, int index
)
63 struct vfio_irq_set irq_set
= {
64 .argsz
= sizeof(irq_set
),
65 .flags
= VFIO_IRQ_SET_DATA_NONE
| VFIO_IRQ_SET_ACTION_MASK
,
71 ioctl(vbasedev
->fd
, VFIO_DEVICE_SET_IRQS
, &irq_set
);
74 static inline const char *action_to_str(int action
)
77 case VFIO_IRQ_SET_ACTION_MASK
:
79 case VFIO_IRQ_SET_ACTION_UNMASK
:
81 case VFIO_IRQ_SET_ACTION_TRIGGER
:
84 return "UNKNOWN ACTION";
88 static const char *index_to_str(VFIODevice
*vbasedev
, int index
)
90 if (vbasedev
->type
!= VFIO_DEVICE_TYPE_PCI
) {
95 case VFIO_PCI_INTX_IRQ_INDEX
:
97 case VFIO_PCI_MSI_IRQ_INDEX
:
99 case VFIO_PCI_MSIX_IRQ_INDEX
:
101 case VFIO_PCI_ERR_IRQ_INDEX
:
103 case VFIO_PCI_REQ_IRQ_INDEX
:
110 int vfio_set_irq_signaling(VFIODevice
*vbasedev
, int index
, int subindex
,
111 int action
, int fd
, Error
**errp
)
113 struct vfio_irq_set
*irq_set
;
118 argsz
= sizeof(*irq_set
) + sizeof(*pfd
);
120 irq_set
= g_malloc0(argsz
);
121 irq_set
->argsz
= argsz
;
122 irq_set
->flags
= VFIO_IRQ_SET_DATA_EVENTFD
| action
;
123 irq_set
->index
= index
;
124 irq_set
->start
= subindex
;
126 pfd
= (int32_t *)&irq_set
->data
;
129 if (ioctl(vbasedev
->fd
, VFIO_DEVICE_SET_IRQS
, irq_set
)) {
138 error_setg_errno(errp
, -ret
, "VFIO_DEVICE_SET_IRQS failure");
140 name
= index_to_str(vbasedev
, index
);
142 error_prepend(errp
, "%s-%d: ", name
, subindex
);
144 error_prepend(errp
, "index %d-%d: ", index
, subindex
);
147 "Failed to %s %s eventfd signaling for interrupt ",
148 fd
< 0 ? "tear down" : "set up", action_to_str(action
));
153 * IO Port/MMIO - Beware of the endians, VFIO is always little endian
155 void vfio_region_write(void *opaque
, hwaddr addr
,
156 uint64_t data
, unsigned size
)
158 VFIORegion
*region
= opaque
;
159 VFIODevice
*vbasedev
= region
->vbasedev
;
172 buf
.word
= cpu_to_le16(data
);
175 buf
.dword
= cpu_to_le32(data
);
178 buf
.qword
= cpu_to_le64(data
);
181 hw_error("vfio: unsupported write size, %u bytes", size
);
185 if (pwrite(vbasedev
->fd
, &buf
, size
, region
->fd_offset
+ addr
) != size
) {
186 error_report("%s(%s:region%d+0x%"HWADDR_PRIx
", 0x%"PRIx64
188 __func__
, vbasedev
->name
, region
->nr
,
192 trace_vfio_region_write(vbasedev
->name
, region
->nr
, addr
, data
, size
);
195 * A read or write to a BAR always signals an INTx EOI. This will
196 * do nothing if not pending (including not in INTx mode). We assume
197 * that a BAR access is in response to an interrupt and that BAR
198 * accesses will service the interrupt. Unfortunately, we don't know
199 * which access will service the interrupt, so we're potentially
200 * getting quite a few host interrupts per guest interrupt.
202 vbasedev
->ops
->vfio_eoi(vbasedev
);
205 uint64_t vfio_region_read(void *opaque
,
206 hwaddr addr
, unsigned size
)
208 VFIORegion
*region
= opaque
;
209 VFIODevice
*vbasedev
= region
->vbasedev
;
218 if (pread(vbasedev
->fd
, &buf
, size
, region
->fd_offset
+ addr
) != size
) {
219 error_report("%s(%s:region%d+0x%"HWADDR_PRIx
", %d) failed: %m",
220 __func__
, vbasedev
->name
, region
->nr
,
229 data
= le16_to_cpu(buf
.word
);
232 data
= le32_to_cpu(buf
.dword
);
235 data
= le64_to_cpu(buf
.qword
);
238 hw_error("vfio: unsupported read size, %u bytes", size
);
242 trace_vfio_region_read(vbasedev
->name
, region
->nr
, addr
, size
, data
);
244 /* Same as write above */
245 vbasedev
->ops
->vfio_eoi(vbasedev
);
250 const MemoryRegionOps vfio_region_ops
= {
251 .read
= vfio_region_read
,
252 .write
= vfio_region_write
,
253 .endianness
= DEVICE_LITTLE_ENDIAN
,
255 .min_access_size
= 1,
256 .max_access_size
= 8,
259 .min_access_size
= 1,
260 .max_access_size
= 8,
264 int vfio_bitmap_alloc(VFIOBitmap
*vbmap
, hwaddr size
)
266 vbmap
->pages
= REAL_HOST_PAGE_ALIGN(size
) / qemu_real_host_page_size();
267 vbmap
->size
= ROUND_UP(vbmap
->pages
, sizeof(__u64
) * BITS_PER_BYTE
) /
269 vbmap
->bitmap
= g_try_malloc0(vbmap
->size
);
270 if (!vbmap
->bitmap
) {
277 struct vfio_info_cap_header
*
278 vfio_get_cap(void *ptr
, uint32_t cap_offset
, uint16_t id
)
280 struct vfio_info_cap_header
*hdr
;
282 for (hdr
= ptr
+ cap_offset
; hdr
!= ptr
; hdr
= ptr
+ hdr
->next
) {
291 struct vfio_info_cap_header
*
292 vfio_get_region_info_cap(struct vfio_region_info
*info
, uint16_t id
)
294 if (!(info
->flags
& VFIO_REGION_INFO_FLAG_CAPS
)) {
298 return vfio_get_cap((void *)info
, info
->cap_offset
, id
);
301 struct vfio_info_cap_header
*
302 vfio_get_device_info_cap(struct vfio_device_info
*info
, uint16_t id
)
304 if (!(info
->flags
& VFIO_DEVICE_FLAGS_CAPS
)) {
308 return vfio_get_cap((void *)info
, info
->cap_offset
, id
);
311 static int vfio_setup_region_sparse_mmaps(VFIORegion
*region
,
312 struct vfio_region_info
*info
)
314 struct vfio_info_cap_header
*hdr
;
315 struct vfio_region_info_cap_sparse_mmap
*sparse
;
318 hdr
= vfio_get_region_info_cap(info
, VFIO_REGION_INFO_CAP_SPARSE_MMAP
);
323 sparse
= container_of(hdr
, struct vfio_region_info_cap_sparse_mmap
, header
);
325 trace_vfio_region_sparse_mmap_header(region
->vbasedev
->name
,
326 region
->nr
, sparse
->nr_areas
);
328 region
->mmaps
= g_new0(VFIOMmap
, sparse
->nr_areas
);
330 for (i
= 0, j
= 0; i
< sparse
->nr_areas
; i
++) {
331 if (sparse
->areas
[i
].size
) {
332 trace_vfio_region_sparse_mmap_entry(i
, sparse
->areas
[i
].offset
,
333 sparse
->areas
[i
].offset
+
334 sparse
->areas
[i
].size
- 1);
335 region
->mmaps
[j
].offset
= sparse
->areas
[i
].offset
;
336 region
->mmaps
[j
].size
= sparse
->areas
[i
].size
;
341 region
->nr_mmaps
= j
;
342 region
->mmaps
= g_realloc(region
->mmaps
, j
* sizeof(VFIOMmap
));
347 int vfio_region_setup(Object
*obj
, VFIODevice
*vbasedev
, VFIORegion
*region
,
348 int index
, const char *name
)
350 struct vfio_region_info
*info
;
353 ret
= vfio_get_region_info(vbasedev
, index
, &info
);
358 region
->vbasedev
= vbasedev
;
359 region
->flags
= info
->flags
;
360 region
->size
= info
->size
;
361 region
->fd_offset
= info
->offset
;
365 region
->mem
= g_new0(MemoryRegion
, 1);
366 memory_region_init_io(region
->mem
, obj
, &vfio_region_ops
,
367 region
, name
, region
->size
);
369 if (!vbasedev
->no_mmap
&&
370 region
->flags
& VFIO_REGION_INFO_FLAG_MMAP
) {
372 ret
= vfio_setup_region_sparse_mmaps(region
, info
);
375 region
->nr_mmaps
= 1;
376 region
->mmaps
= g_new0(VFIOMmap
, region
->nr_mmaps
);
377 region
->mmaps
[0].offset
= 0;
378 region
->mmaps
[0].size
= region
->size
;
385 trace_vfio_region_setup(vbasedev
->name
, index
, name
,
386 region
->flags
, region
->fd_offset
, region
->size
);
390 static void vfio_subregion_unmap(VFIORegion
*region
, int index
)
392 trace_vfio_region_unmap(memory_region_name(®ion
->mmaps
[index
].mem
),
393 region
->mmaps
[index
].offset
,
394 region
->mmaps
[index
].offset
+
395 region
->mmaps
[index
].size
- 1);
396 memory_region_del_subregion(region
->mem
, ®ion
->mmaps
[index
].mem
);
397 munmap(region
->mmaps
[index
].mmap
, region
->mmaps
[index
].size
);
398 object_unparent(OBJECT(®ion
->mmaps
[index
].mem
));
399 region
->mmaps
[index
].mmap
= NULL
;
402 int vfio_region_mmap(VFIORegion
*region
)
411 prot
|= region
->flags
& VFIO_REGION_INFO_FLAG_READ
? PROT_READ
: 0;
412 prot
|= region
->flags
& VFIO_REGION_INFO_FLAG_WRITE
? PROT_WRITE
: 0;
414 for (i
= 0; i
< region
->nr_mmaps
; i
++) {
415 region
->mmaps
[i
].mmap
= mmap(NULL
, region
->mmaps
[i
].size
, prot
,
416 MAP_SHARED
, region
->vbasedev
->fd
,
418 region
->mmaps
[i
].offset
);
419 if (region
->mmaps
[i
].mmap
== MAP_FAILED
) {
422 trace_vfio_region_mmap_fault(memory_region_name(region
->mem
), i
,
424 region
->mmaps
[i
].offset
,
426 region
->mmaps
[i
].offset
+
427 region
->mmaps
[i
].size
- 1, ret
);
429 region
->mmaps
[i
].mmap
= NULL
;
431 for (i
--; i
>= 0; i
--) {
432 vfio_subregion_unmap(region
, i
);
438 name
= g_strdup_printf("%s mmaps[%d]",
439 memory_region_name(region
->mem
), i
);
440 memory_region_init_ram_device_ptr(®ion
->mmaps
[i
].mem
,
441 memory_region_owner(region
->mem
),
442 name
, region
->mmaps
[i
].size
,
443 region
->mmaps
[i
].mmap
);
445 memory_region_add_subregion(region
->mem
, region
->mmaps
[i
].offset
,
446 ®ion
->mmaps
[i
].mem
);
448 trace_vfio_region_mmap(memory_region_name(®ion
->mmaps
[i
].mem
),
449 region
->mmaps
[i
].offset
,
450 region
->mmaps
[i
].offset
+
451 region
->mmaps
[i
].size
- 1);
457 void vfio_region_unmap(VFIORegion
*region
)
465 for (i
= 0; i
< region
->nr_mmaps
; i
++) {
466 if (region
->mmaps
[i
].mmap
) {
467 vfio_subregion_unmap(region
, i
);
472 void vfio_region_exit(VFIORegion
*region
)
480 for (i
= 0; i
< region
->nr_mmaps
; i
++) {
481 if (region
->mmaps
[i
].mmap
) {
482 memory_region_del_subregion(region
->mem
, ®ion
->mmaps
[i
].mem
);
486 trace_vfio_region_exit(region
->vbasedev
->name
, region
->nr
);
489 void vfio_region_finalize(VFIORegion
*region
)
497 for (i
= 0; i
< region
->nr_mmaps
; i
++) {
498 if (region
->mmaps
[i
].mmap
) {
499 munmap(region
->mmaps
[i
].mmap
, region
->mmaps
[i
].size
);
500 object_unparent(OBJECT(®ion
->mmaps
[i
].mem
));
504 object_unparent(OBJECT(region
->mem
));
507 g_free(region
->mmaps
);
509 trace_vfio_region_finalize(region
->vbasedev
->name
, region
->nr
);
512 region
->mmaps
= NULL
;
513 region
->nr_mmaps
= 0;
519 void vfio_region_mmaps_set_enabled(VFIORegion
*region
, bool enabled
)
527 for (i
= 0; i
< region
->nr_mmaps
; i
++) {
528 if (region
->mmaps
[i
].mmap
) {
529 memory_region_set_enabled(®ion
->mmaps
[i
].mem
, enabled
);
533 trace_vfio_region_mmaps_set_enabled(memory_region_name(region
->mem
),
537 int vfio_get_region_info(VFIODevice
*vbasedev
, int index
,
538 struct vfio_region_info
**info
)
540 size_t argsz
= sizeof(struct vfio_region_info
);
542 *info
= g_malloc0(argsz
);
544 (*info
)->index
= index
;
546 (*info
)->argsz
= argsz
;
548 if (ioctl(vbasedev
->fd
, VFIO_DEVICE_GET_REGION_INFO
, *info
)) {
554 if ((*info
)->argsz
> argsz
) {
555 argsz
= (*info
)->argsz
;
556 *info
= g_realloc(*info
, argsz
);
564 int vfio_get_dev_region_info(VFIODevice
*vbasedev
, uint32_t type
,
565 uint32_t subtype
, struct vfio_region_info
**info
)
569 for (i
= 0; i
< vbasedev
->num_regions
; i
++) {
570 struct vfio_info_cap_header
*hdr
;
571 struct vfio_region_info_cap_type
*cap_type
;
573 if (vfio_get_region_info(vbasedev
, i
, info
)) {
577 hdr
= vfio_get_region_info_cap(*info
, VFIO_REGION_INFO_CAP_TYPE
);
583 cap_type
= container_of(hdr
, struct vfio_region_info_cap_type
, header
);
585 trace_vfio_get_dev_region(vbasedev
->name
, i
,
586 cap_type
->type
, cap_type
->subtype
);
588 if (cap_type
->type
== type
&& cap_type
->subtype
== subtype
) {
599 bool vfio_has_region_cap(VFIODevice
*vbasedev
, int region
, uint16_t cap_type
)
601 struct vfio_region_info
*info
= NULL
;
604 if (!vfio_get_region_info(vbasedev
, region
, &info
)) {
605 if (vfio_get_region_info_cap(info
, cap_type
)) {
614 int vfio_device_get_name(VFIODevice
*vbasedev
, Error
**errp
)
618 if (vbasedev
->fd
< 0) {
619 if (stat(vbasedev
->sysfsdev
, &st
) < 0) {
620 error_setg_errno(errp
, errno
, "no such host device");
621 error_prepend(errp
, VFIO_MSG_PREFIX
, vbasedev
->sysfsdev
);
624 /* User may specify a name, e.g: VFIO platform device */
625 if (!vbasedev
->name
) {
626 vbasedev
->name
= g_path_get_basename(vbasedev
->sysfsdev
);
629 if (!vbasedev
->iommufd
) {
630 error_setg(errp
, "Use FD passing only with iommufd backend");
634 * Give a name with fd so any function printing out vbasedev->name
637 if (!vbasedev
->name
) {
638 vbasedev
->name
= g_strdup_printf("VFIO_FD%d", vbasedev
->fd
);
645 void vfio_device_set_fd(VFIODevice
*vbasedev
, const char *str
, Error
**errp
)
647 int fd
= monitor_fd_param(monitor_cur(), str
, errp
);
650 error_prepend(errp
, "Could not parse remote object fd %s:", str
);