Merge tag 'v9.1.0'
[qemu/ar7.git] / hw / remote / vfio-user-obj.c
blob8dbafafb9e742b16afd68034b5036e12e52031fa
1 /**
2 * QEMU vfio-user-server server object
4 * Copyright © 2022 Oracle and/or its affiliates.
6 * This work is licensed under the terms of the GNU GPL-v2, version 2 or later.
8 * See the COPYING file in the top-level directory.
12 /**
13 * Usage: add options:
14 * -machine x-remote,vfio-user=on,auto-shutdown=on
15 * -device <PCI-device>,id=<pci-dev-id>
16 * -object x-vfio-user-server,id=<id>,type=unix,path=<socket-path>,
17 * device=<pci-dev-id>
19 * Note that x-vfio-user-server object must be used with x-remote machine only.
20 * This server could only support PCI devices for now.
22 * type - SocketAddress type - presently "unix" alone is supported. Required
23 * option
25 * path - named unix socket, it will be created by the server. It is
26 * a required option
28 * device - id of a device on the server, a required option. PCI devices
29 * alone are supported presently.
31 * notes - x-vfio-user-server could block IO and monitor during the
32 * initialization phase.
34 * When x-remote machine has the auto-shutdown property
35 * enabled (default), x-vfio-user-server terminates after the last
36 * client disconnects. Otherwise, it will continue running until
37 * explicitly killed.
40 #include "qemu/osdep.h"
42 #include "qom/object.h"
43 #include "qom/object_interfaces.h"
44 #include "qemu/error-report.h"
45 #include "trace.h"
46 #include "sysemu/runstate.h"
47 #include "hw/boards.h"
48 #include "hw/remote/machine.h"
49 #include "qapi/error.h"
50 #include "qapi/qapi-visit-sockets.h"
51 #include "qapi/qapi-events-misc.h"
52 #include "qemu/notify.h"
53 #include "qemu/thread.h"
54 #include "qemu/main-loop.h"
55 #include "sysemu/sysemu.h"
56 #include "libvfio-user.h"
57 #include "hw/qdev-core.h"
58 #include "hw/pci/pci.h"
59 #include "qemu/timer.h"
60 #include "exec/memory.h"
61 #include "hw/pci/msi.h"
62 #include "hw/pci/msix.h"
63 #include "hw/remote/vfio-user-obj.h"
65 #define TYPE_VFU_OBJECT "x-vfio-user-server"
66 OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
68 /**
69 * VFU_OBJECT_ERROR - reports an error message.
71 * If auto_shutdown is set, it aborts the machine on error. Otherwise,
72 * it logs an error message without aborting. auto_shutdown is disabled
73 * when the server serves clients from multiple VMs; as such, an error
74 * from one VM shouldn't be able to disrupt other VM's services.
76 #define VFU_OBJECT_ERROR(o, fmt, ...) \
77 { \
78 if (vfu_object_auto_shutdown()) { \
79 error_setg(&error_abort, (fmt), ## __VA_ARGS__); \
80 } else { \
81 error_report((fmt), ## __VA_ARGS__); \
82 } \
83 } \
85 struct VfuObjectClass {
86 ObjectClass parent_class;
88 unsigned int nr_devs;
91 struct VfuObject {
92 /* private */
93 Object parent;
95 SocketAddress *socket;
97 char *device;
99 Error *err;
101 Notifier machine_done;
103 vfu_ctx_t *vfu_ctx;
105 PCIDevice *pci_dev;
107 Error *unplug_blocker;
109 int vfu_poll_fd;
111 MSITriggerFunc *default_msi_trigger;
112 MSIPrepareMessageFunc *default_msi_prepare_message;
113 MSIxPrepareMessageFunc *default_msix_prepare_message;
116 static void vfu_object_init_ctx(VfuObject *o, Error **errp);
118 static bool vfu_object_auto_shutdown(void)
120 bool auto_shutdown = true;
121 Error *local_err = NULL;
123 if (!current_machine) {
124 return auto_shutdown;
127 auto_shutdown = object_property_get_bool(OBJECT(current_machine),
128 "auto-shutdown",
129 &local_err);
132 * local_err would be set if no such property exists - safe to ignore.
133 * Unlikely scenario as auto-shutdown is always defined for
134 * TYPE_REMOTE_MACHINE, and TYPE_VFU_OBJECT only works with
135 * TYPE_REMOTE_MACHINE
137 if (local_err) {
138 auto_shutdown = true;
139 error_free(local_err);
142 return auto_shutdown;
145 static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
146 void *opaque, Error **errp)
148 VfuObject *o = VFU_OBJECT(obj);
150 if (o->vfu_ctx) {
151 error_setg(errp, "vfu: Unable to set socket property - server busy");
152 return;
155 qapi_free_SocketAddress(o->socket);
157 o->socket = NULL;
159 visit_type_SocketAddress(v, name, &o->socket, errp);
161 if (o->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
162 error_setg(errp, "vfu: Unsupported socket type - %s",
163 SocketAddressType_str(o->socket->type));
164 qapi_free_SocketAddress(o->socket);
165 o->socket = NULL;
166 return;
169 trace_vfu_prop("socket", o->socket->u.q_unix.path);
171 vfu_object_init_ctx(o, errp);
174 static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
176 VfuObject *o = VFU_OBJECT(obj);
178 if (o->vfu_ctx) {
179 error_setg(errp, "vfu: Unable to set device property - server busy");
180 return;
183 g_free(o->device);
185 o->device = g_strdup(str);
187 trace_vfu_prop("device", str);
189 vfu_object_init_ctx(o, errp);
192 static void vfu_object_ctx_run(void *opaque)
194 VfuObject *o = opaque;
195 const char *vfu_id;
196 char *vfu_path, *pci_dev_path;
197 int ret = -1;
199 while (ret != 0) {
200 ret = vfu_run_ctx(o->vfu_ctx);
201 if (ret < 0) {
202 if (errno == EINTR) {
203 continue;
204 } else if (errno == ENOTCONN) {
205 vfu_id = object_get_canonical_path_component(OBJECT(o));
206 vfu_path = object_get_canonical_path(OBJECT(o));
207 g_assert(o->pci_dev);
208 pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
209 /* o->device is a required property and is non-NULL here */
210 g_assert(o->device);
211 qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
212 o->device, pci_dev_path);
213 qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
214 o->vfu_poll_fd = -1;
215 object_unparent(OBJECT(o));
216 g_free(vfu_path);
217 g_free(pci_dev_path);
218 break;
219 } else {
220 VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s",
221 o->device, strerror(errno));
222 break;
228 static void vfu_object_attach_ctx(void *opaque)
230 VfuObject *o = opaque;
231 GPollFD pfds[1];
232 int ret;
234 qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
236 pfds[0].fd = o->vfu_poll_fd;
237 pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
239 retry_attach:
240 ret = vfu_attach_ctx(o->vfu_ctx);
241 if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
243 * vfu_object_attach_ctx can block QEMU's main loop
244 * during attach - the monitor and other IO
245 * could be unresponsive during this time.
247 (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
248 goto retry_attach;
249 } else if (ret < 0) {
250 VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s",
251 o->device, strerror(errno));
252 return;
255 o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
256 if (o->vfu_poll_fd < 0) {
257 VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device);
258 return;
261 qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o);
264 static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
265 size_t count, loff_t offset,
266 const bool is_write)
268 VfuObject *o = vfu_get_private(vfu_ctx);
269 uint32_t pci_access_width = sizeof(uint32_t);
270 size_t bytes = count;
271 uint32_t val = 0;
272 char *ptr = buf;
273 int len;
276 * Writes to the BAR registers would trigger an update to the
277 * global Memory and IO AddressSpaces. But the remote device
278 * never uses the global AddressSpaces, therefore overlapping
279 * memory regions are not a problem
281 while (bytes > 0) {
282 len = (bytes > pci_access_width) ? pci_access_width : bytes;
283 if (is_write) {
284 val = ldn_le_p(ptr, len);
285 pci_host_config_write_common(o->pci_dev, offset,
286 pci_config_size(o->pci_dev),
287 val, len);
288 trace_vfu_cfg_write(offset, val);
289 } else {
290 val = pci_host_config_read_common(o->pci_dev, offset,
291 pci_config_size(o->pci_dev), len);
292 stn_le_p(ptr, len, val);
293 trace_vfu_cfg_read(offset, val);
295 offset += len;
296 ptr += len;
297 bytes -= len;
300 return count;
303 static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
305 VfuObject *o = vfu_get_private(vfu_ctx);
306 AddressSpace *dma_as = NULL;
307 MemoryRegion *subregion = NULL;
308 g_autofree char *name = NULL;
309 struct iovec *iov = &info->iova;
311 if (!info->vaddr) {
312 return;
315 name = g_strdup_printf("mem-%s-%"PRIx64"", o->device,
316 (uint64_t)info->vaddr);
318 subregion = g_new0(MemoryRegion, 1);
320 memory_region_init_ram_ptr(subregion, NULL, name,
321 iov->iov_len, info->vaddr);
323 dma_as = pci_device_iommu_address_space(o->pci_dev);
325 memory_region_add_subregion(dma_as->root, (hwaddr)iov->iov_base, subregion);
327 trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len);
330 static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
332 VfuObject *o = vfu_get_private(vfu_ctx);
333 AddressSpace *dma_as = NULL;
334 MemoryRegion *mr = NULL;
335 ram_addr_t offset;
337 mr = memory_region_from_host(info->vaddr, &offset);
338 if (!mr) {
339 return;
342 dma_as = pci_device_iommu_address_space(o->pci_dev);
344 memory_region_del_subregion(dma_as->root, mr);
346 object_unparent((OBJECT(mr)));
348 trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
351 static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset,
352 hwaddr size, const bool is_write)
354 uint8_t *ptr = buf;
355 bool release_lock = false;
356 uint8_t *ram_ptr = NULL;
357 MemTxResult result;
358 int access_size;
359 uint64_t val;
361 if (memory_access_is_direct(mr, is_write)) {
363 * Some devices expose a PCI expansion ROM, which could be buffer
364 * based as compared to other regions which are primarily based on
365 * MemoryRegionOps. memory_region_find() would already check
366 * for buffer overflow, we don't need to repeat it here.
368 ram_ptr = memory_region_get_ram_ptr(mr);
370 if (is_write) {
371 memcpy((ram_ptr + offset), buf, size);
372 } else {
373 memcpy(buf, (ram_ptr + offset), size);
376 return 0;
379 while (size) {
381 * The read/write logic used below is similar to the ones in
382 * flatview_read/write_continue()
384 release_lock = prepare_mmio_access(mr);
386 access_size = memory_access_size(mr, size, offset);
388 if (is_write) {
389 val = ldn_he_p(ptr, access_size);
391 result = memory_region_dispatch_write(mr, offset, val,
392 size_memop(access_size),
393 MEMTXATTRS_UNSPECIFIED);
394 } else {
395 result = memory_region_dispatch_read(mr, offset, &val,
396 size_memop(access_size),
397 MEMTXATTRS_UNSPECIFIED);
399 stn_he_p(ptr, access_size, val);
402 if (release_lock) {
403 bql_unlock();
404 release_lock = false;
407 if (result != MEMTX_OK) {
408 return -1;
411 size -= access_size;
412 ptr += access_size;
413 offset += access_size;
416 return 0;
419 static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar,
420 hwaddr bar_offset, char * const buf,
421 hwaddr len, const bool is_write)
423 MemoryRegionSection section = { 0 };
424 uint8_t *ptr = (uint8_t *)buf;
425 MemoryRegion *section_mr = NULL;
426 uint64_t section_size;
427 hwaddr section_offset;
428 hwaddr size = 0;
430 while (len) {
431 section = memory_region_find(pci_dev->io_regions[pci_bar].memory,
432 bar_offset, len);
434 if (!section.mr) {
435 warn_report("vfu: invalid address 0x%"PRIx64"", bar_offset);
436 return size;
439 section_mr = section.mr;
440 section_offset = section.offset_within_region;
441 section_size = int128_get64(section.size);
443 if (is_write && section_mr->readonly) {
444 warn_report("vfu: attempting to write to readonly region in "
445 "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]",
446 pci_bar, bar_offset,
447 (bar_offset + section_size));
448 memory_region_unref(section_mr);
449 return size;
452 if (vfu_object_mr_rw(section_mr, ptr, section_offset,
453 section_size, is_write)) {
454 warn_report("vfu: failed to %s "
455 "[0x%"PRIx64" - 0x%"PRIx64"] in bar %d",
456 is_write ? "write to" : "read from", bar_offset,
457 (bar_offset + section_size), pci_bar);
458 memory_region_unref(section_mr);
459 return size;
462 size += section_size;
463 bar_offset += section_size;
464 ptr += section_size;
465 len -= section_size;
467 memory_region_unref(section_mr);
470 return size;
474 * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs.
476 * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would
477 * define vfu_object_bar2_handler
479 #define VFU_OBJECT_BAR_HANDLER(BAR_NO) \
480 static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx, \
481 char * const buf, size_t count, \
482 loff_t offset, const bool is_write) \
484 VfuObject *o = vfu_get_private(vfu_ctx); \
485 PCIDevice *pci_dev = o->pci_dev; \
487 return vfu_object_bar_rw(pci_dev, BAR_NO, offset, \
488 buf, count, is_write); \
491 VFU_OBJECT_BAR_HANDLER(0)
492 VFU_OBJECT_BAR_HANDLER(1)
493 VFU_OBJECT_BAR_HANDLER(2)
494 VFU_OBJECT_BAR_HANDLER(3)
495 VFU_OBJECT_BAR_HANDLER(4)
496 VFU_OBJECT_BAR_HANDLER(5)
497 VFU_OBJECT_BAR_HANDLER(6)
499 static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = {
500 &vfu_object_bar0_handler,
501 &vfu_object_bar1_handler,
502 &vfu_object_bar2_handler,
503 &vfu_object_bar3_handler,
504 &vfu_object_bar4_handler,
505 &vfu_object_bar5_handler,
506 &vfu_object_bar6_handler,
510 * vfu_object_register_bars - Identify active BAR regions of pdev and setup
511 * callbacks to handle read/write accesses
513 static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
515 int flags = VFU_REGION_FLAG_RW;
516 int i;
518 for (i = 0; i < PCI_NUM_REGIONS; i++) {
519 if (!pdev->io_regions[i].size) {
520 continue;
523 if ((i == VFU_PCI_DEV_ROM_REGION_IDX) ||
524 pdev->io_regions[i].memory->readonly) {
525 flags &= ~VFU_REGION_FLAG_WRITE;
528 vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i,
529 (size_t)pdev->io_regions[i].size,
530 vfu_object_bar_handlers[i],
531 flags, NULL, 0, -1, 0);
533 trace_vfu_bar_register(i, pdev->io_regions[i].addr,
534 pdev->io_regions[i].size);
538 static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
540 int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
541 pci_dev->devfn);
543 return pci_bdf;
546 static void vfu_object_set_irq(void *opaque, int pirq, int level)
548 PCIBus *pci_bus = opaque;
549 PCIDevice *pci_dev = NULL;
550 vfu_ctx_t *vfu_ctx = NULL;
551 int pci_bus_num, devfn;
553 if (level) {
554 pci_bus_num = PCI_BUS_NUM(pirq);
555 devfn = PCI_BDF_TO_DEVFN(pirq);
558 * pci_find_device() performs at O(1) if the device is attached
559 * to the root PCI bus. Whereas, if the device is attached to a
560 * secondary PCI bus (such as when a root port is involved),
561 * finding the parent PCI bus could take O(n)
563 pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn);
565 vfu_ctx = pci_dev->irq_opaque;
567 g_assert(vfu_ctx);
569 vfu_irq_trigger(vfu_ctx, 0);
573 static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev,
574 unsigned int vector)
576 MSIMessage msg;
578 msg.address = 0;
579 msg.data = vector;
581 return msg;
584 static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg)
586 vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque;
588 vfu_irq_trigger(vfu_ctx, msg.data);
591 static void vfu_object_setup_msi_cbs(VfuObject *o)
593 o->default_msi_trigger = o->pci_dev->msi_trigger;
594 o->default_msi_prepare_message = o->pci_dev->msi_prepare_message;
595 o->default_msix_prepare_message = o->pci_dev->msix_prepare_message;
597 o->pci_dev->msi_trigger = vfu_object_msi_trigger;
598 o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg;
599 o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg;
602 static void vfu_object_restore_msi_cbs(VfuObject *o)
604 o->pci_dev->msi_trigger = o->default_msi_trigger;
605 o->pci_dev->msi_prepare_message = o->default_msi_prepare_message;
606 o->pci_dev->msix_prepare_message = o->default_msix_prepare_message;
609 static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
610 uint32_t count, bool mask)
612 VfuObject *o = vfu_get_private(vfu_ctx);
613 uint32_t vector;
615 for (vector = start; vector < count; vector++) {
616 msix_set_mask(o->pci_dev, vector, mask);
620 static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
621 uint32_t count, bool mask)
623 VfuObject *o = vfu_get_private(vfu_ctx);
624 Error *err = NULL;
625 uint32_t vector;
627 for (vector = start; vector < count; vector++) {
628 msi_set_mask(o->pci_dev, vector, mask, &err);
629 if (err) {
630 VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
631 error_get_pretty(err));
632 error_free(err);
633 err = NULL;
638 static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
640 vfu_ctx_t *vfu_ctx = o->vfu_ctx;
641 int ret;
643 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
644 if (ret < 0) {
645 return ret;
648 if (msix_nr_vectors_allocated(pci_dev)) {
649 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
650 msix_nr_vectors_allocated(pci_dev));
651 vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ,
652 &vfu_msix_irq_state);
653 } else if (msi_nr_vectors_allocated(pci_dev)) {
654 ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
655 msi_nr_vectors_allocated(pci_dev));
656 vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ,
657 &vfu_msi_irq_state);
660 if (ret < 0) {
661 return ret;
664 vfu_object_setup_msi_cbs(o);
666 pci_dev->irq_opaque = vfu_ctx;
668 return 0;
671 void vfu_object_set_bus_irq(PCIBus *pci_bus)
673 int bus_num = pci_bus_num(pci_bus);
674 int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1);
676 pci_bus_irqs(pci_bus, vfu_object_set_irq, pci_bus, max_bdf);
677 pci_bus_map_irqs(pci_bus, vfu_object_map_irq);
680 static int vfu_object_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type)
682 VfuObject *o = vfu_get_private(vfu_ctx);
684 /* vfu_object_ctx_run() handles lost connection */
685 if (type == VFU_RESET_LOST_CONN) {
686 return 0;
689 device_cold_reset(DEVICE(o->pci_dev));
691 return 0;
695 * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
696 * properties. It also depends on devices instantiated in QEMU. These
697 * dependencies are not available during the instance_init phase of this
698 * object's life-cycle. As such, the server is initialized after the
699 * machine is setup. machine_init_done_notifier notifies TYPE_VFU_OBJECT
700 * when the machine is setup, and the dependencies are available.
702 static void vfu_object_machine_done(Notifier *notifier, void *data)
704 VfuObject *o = container_of(notifier, VfuObject, machine_done);
705 Error *err = NULL;
707 vfu_object_init_ctx(o, &err);
709 if (err) {
710 error_propagate(&error_abort, err);
715 * vfu_object_init_ctx: Create and initialize libvfio-user context. Add
716 * an unplug blocker for the associated PCI device. Setup a FD handler
717 * to process incoming messages in the context's socket.
719 * The socket and device properties are mandatory, and this function
720 * will not create the context without them - the setters for these
721 * properties should call this function when the property is set. The
722 * machine should also be ready when this function is invoked - it is
723 * because QEMU objects are initialized before devices, and the
724 * associated PCI device wouldn't be available at the object
725 * initialization time. Until these conditions are satisfied, this
726 * function would return early without performing any task.
728 static void vfu_object_init_ctx(VfuObject *o, Error **errp)
730 DeviceState *dev = NULL;
731 vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL;
732 int ret;
734 if (o->vfu_ctx || !o->socket || !o->device ||
735 !phase_check(PHASE_MACHINE_READY)) {
736 return;
739 if (o->err) {
740 error_propagate(errp, o->err);
741 o->err = NULL;
742 return;
745 o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path,
746 LIBVFIO_USER_FLAG_ATTACH_NB,
747 o, VFU_DEV_TYPE_PCI);
748 if (o->vfu_ctx == NULL) {
749 error_setg(errp, "vfu: Failed to create context - %s", strerror(errno));
750 return;
753 dev = qdev_find_recursive(sysbus_get_default(), o->device);
754 if (dev == NULL) {
755 error_setg(errp, "vfu: Device %s not found", o->device);
756 goto fail;
759 if (!object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
760 error_setg(errp, "vfu: %s not a PCI device", o->device);
761 goto fail;
764 o->pci_dev = PCI_DEVICE(dev);
766 object_ref(OBJECT(o->pci_dev));
768 if (pci_is_express(o->pci_dev)) {
769 pci_type = VFU_PCI_TYPE_EXPRESS;
772 ret = vfu_pci_init(o->vfu_ctx, pci_type, PCI_HEADER_TYPE_NORMAL, 0);
773 if (ret < 0) {
774 error_setg(errp,
775 "vfu: Failed to attach PCI device %s to context - %s",
776 o->device, strerror(errno));
777 goto fail;
780 error_setg(&o->unplug_blocker,
781 "vfu: %s for %s must be deleted before unplugging",
782 TYPE_VFU_OBJECT, o->device);
783 qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
785 ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX,
786 pci_config_size(o->pci_dev), &vfu_object_cfg_access,
787 VFU_REGION_FLAG_RW | VFU_REGION_FLAG_ALWAYS_CB,
788 NULL, 0, -1, 0);
789 if (ret < 0) {
790 error_setg(errp,
791 "vfu: Failed to setup config space handlers for %s- %s",
792 o->device, strerror(errno));
793 goto fail;
796 ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister);
797 if (ret < 0) {
798 error_setg(errp, "vfu: Failed to setup DMA handlers for %s",
799 o->device);
800 goto fail;
803 vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
805 ret = vfu_object_setup_irqs(o, o->pci_dev);
806 if (ret < 0) {
807 error_setg(errp, "vfu: Failed to setup interrupts for %s",
808 o->device);
809 goto fail;
812 ret = vfu_setup_device_reset_cb(o->vfu_ctx, &vfu_object_device_reset);
813 if (ret < 0) {
814 error_setg(errp, "vfu: Failed to setup reset callback");
815 goto fail;
818 ret = vfu_realize_ctx(o->vfu_ctx);
819 if (ret < 0) {
820 error_setg(errp, "vfu: Failed to realize device %s- %s",
821 o->device, strerror(errno));
822 goto fail;
825 o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
826 if (o->vfu_poll_fd < 0) {
827 error_setg(errp, "vfu: Failed to get poll fd %s", o->device);
828 goto fail;
831 qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o);
833 return;
835 fail:
836 vfu_destroy_ctx(o->vfu_ctx);
837 if (o->unplug_blocker && o->pci_dev) {
838 qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
839 error_free(o->unplug_blocker);
840 o->unplug_blocker = NULL;
842 if (o->pci_dev) {
843 vfu_object_restore_msi_cbs(o);
844 o->pci_dev->irq_opaque = NULL;
845 object_unref(OBJECT(o->pci_dev));
846 o->pci_dev = NULL;
848 o->vfu_ctx = NULL;
851 static void vfu_object_init(Object *obj)
853 VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
854 VfuObject *o = VFU_OBJECT(obj);
856 k->nr_devs++;
858 if (!object_dynamic_cast(OBJECT(current_machine), TYPE_REMOTE_MACHINE)) {
859 error_setg(&o->err, "vfu: %s only compatible with %s machine",
860 TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE);
861 return;
864 if (!phase_check(PHASE_MACHINE_READY)) {
865 o->machine_done.notify = vfu_object_machine_done;
866 qemu_add_machine_init_done_notifier(&o->machine_done);
869 o->vfu_poll_fd = -1;
872 static void vfu_object_finalize(Object *obj)
874 VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
875 VfuObject *o = VFU_OBJECT(obj);
877 k->nr_devs--;
879 qapi_free_SocketAddress(o->socket);
881 o->socket = NULL;
883 if (o->vfu_poll_fd != -1) {
884 qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
885 o->vfu_poll_fd = -1;
888 if (o->vfu_ctx) {
889 vfu_destroy_ctx(o->vfu_ctx);
890 o->vfu_ctx = NULL;
893 g_free(o->device);
895 o->device = NULL;
897 if (o->unplug_blocker && o->pci_dev) {
898 qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
899 error_free(o->unplug_blocker);
900 o->unplug_blocker = NULL;
903 if (o->pci_dev) {
904 vfu_object_restore_msi_cbs(o);
905 o->pci_dev->irq_opaque = NULL;
906 object_unref(OBJECT(o->pci_dev));
907 o->pci_dev = NULL;
910 if (!k->nr_devs && vfu_object_auto_shutdown()) {
911 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
914 if (o->machine_done.notify) {
915 qemu_remove_machine_init_done_notifier(&o->machine_done);
916 o->machine_done.notify = NULL;
920 static void vfu_object_class_init(ObjectClass *klass, void *data)
922 VfuObjectClass *k = VFU_OBJECT_CLASS(klass);
924 k->nr_devs = 0;
926 object_class_property_add(klass, "socket", "SocketAddress", NULL,
927 vfu_object_set_socket, NULL, NULL);
928 object_class_property_set_description(klass, "socket",
929 "SocketAddress "
930 "(ex: type=unix,path=/tmp/sock). "
931 "Only UNIX is presently supported");
932 object_class_property_add_str(klass, "device", NULL,
933 vfu_object_set_device);
934 object_class_property_set_description(klass, "device",
935 "device ID - only PCI devices "
936 "are presently supported");
939 static const TypeInfo vfu_object_info = {
940 .name = TYPE_VFU_OBJECT,
941 .parent = TYPE_OBJECT,
942 .instance_size = sizeof(VfuObject),
943 .instance_init = vfu_object_init,
944 .instance_finalize = vfu_object_finalize,
945 .class_size = sizeof(VfuObjectClass),
946 .class_init = vfu_object_class_init,
947 .interfaces = (InterfaceInfo[]) {
948 { TYPE_USER_CREATABLE },
953 static void vfu_register_types(void)
955 type_register_static(&vfu_object_info);
958 type_init(vfu_register_types);