vfio/migration: Move from STOP_COPY to STOP in vfio_save_cleanup()
[qemu/armbru.git] / hw / vfio / migration.c
blob8acd182a8bf3fcd0eb0368816ff3093242b103f5
1 /*
2 * Migration support for VFIO devices
4 * Copyright NVIDIA, Inc. 2020
6 * This work is licensed under the terms of the GNU GPL, version 2. See
7 * the COPYING file in the top-level directory.
8 */
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qemu/cutils.h"
13 #include "qemu/units.h"
14 #include "qemu/error-report.h"
15 #include <linux/vfio.h>
16 #include <sys/ioctl.h>
18 #include "sysemu/runstate.h"
19 #include "hw/vfio/vfio-common.h"
20 #include "migration/migration.h"
21 #include "migration/options.h"
22 #include "migration/savevm.h"
23 #include "migration/vmstate.h"
24 #include "migration/qemu-file.h"
25 #include "migration/register.h"
26 #include "migration/blocker.h"
27 #include "migration/misc.h"
28 #include "qapi/error.h"
29 #include "exec/ramlist.h"
30 #include "exec/ram_addr.h"
31 #include "pci.h"
32 #include "trace.h"
33 #include "hw/hw.h"
36 * Flags to be used as unique delimiters for VFIO devices in the migration
37 * stream. These flags are composed as:
38 * 0xffffffff => MSB 32-bit all 1s
39 * 0xef10 => Magic ID, represents emulated (virtual) function IO
40 * 0x0000 => 16-bits reserved for flags
42 * The beginning of state information is marked by _DEV_CONFIG_STATE,
43 * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
44 * certain state information is marked by _END_OF_STATE.
46 #define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL)
47 #define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
48 #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
49 #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
50 #define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL)
53 * This is an arbitrary size based on migration of mlx5 devices, where typically
54 * total device migration size is on the order of 100s of MB. Testing with
55 * larger values, e.g. 128MB and 1GB, did not show a performance improvement.
57 #define VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE (1 * MiB)
59 static int64_t bytes_transferred;
61 static const char *mig_state_to_str(enum vfio_device_mig_state state)
63 switch (state) {
64 case VFIO_DEVICE_STATE_ERROR:
65 return "ERROR";
66 case VFIO_DEVICE_STATE_STOP:
67 return "STOP";
68 case VFIO_DEVICE_STATE_RUNNING:
69 return "RUNNING";
70 case VFIO_DEVICE_STATE_STOP_COPY:
71 return "STOP_COPY";
72 case VFIO_DEVICE_STATE_RESUMING:
73 return "RESUMING";
74 case VFIO_DEVICE_STATE_PRE_COPY:
75 return "PRE_COPY";
76 default:
77 return "UNKNOWN STATE";
81 static int vfio_migration_set_state(VFIODevice *vbasedev,
82 enum vfio_device_mig_state new_state,
83 enum vfio_device_mig_state recover_state)
85 VFIOMigration *migration = vbasedev->migration;
86 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
87 sizeof(struct vfio_device_feature_mig_state),
88 sizeof(uint64_t))] = {};
89 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
90 struct vfio_device_feature_mig_state *mig_state =
91 (struct vfio_device_feature_mig_state *)feature->data;
92 int ret;
94 feature->argsz = sizeof(buf);
95 feature->flags =
96 VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE;
97 mig_state->device_state = new_state;
98 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
99 /* Try to set the device in some good state */
100 ret = -errno;
102 if (recover_state == VFIO_DEVICE_STATE_ERROR) {
103 error_report("%s: Failed setting device state to %s, err: %s. "
104 "Recover state is ERROR. Resetting device",
105 vbasedev->name, mig_state_to_str(new_state),
106 strerror(errno));
108 goto reset_device;
111 error_report(
112 "%s: Failed setting device state to %s, err: %s. Setting device in recover state %s",
113 vbasedev->name, mig_state_to_str(new_state),
114 strerror(errno), mig_state_to_str(recover_state));
116 mig_state->device_state = recover_state;
117 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
118 ret = -errno;
119 error_report(
120 "%s: Failed setting device in recover state, err: %s. Resetting device",
121 vbasedev->name, strerror(errno));
123 goto reset_device;
126 migration->device_state = recover_state;
128 return ret;
131 migration->device_state = new_state;
132 if (mig_state->data_fd != -1) {
133 if (migration->data_fd != -1) {
135 * This can happen if the device is asynchronously reset and
136 * terminates a data transfer.
138 error_report("%s: data_fd out of sync", vbasedev->name);
139 close(mig_state->data_fd);
141 return -EBADF;
144 migration->data_fd = mig_state->data_fd;
147 trace_vfio_migration_set_state(vbasedev->name, mig_state_to_str(new_state));
149 return 0;
151 reset_device:
152 if (ioctl(vbasedev->fd, VFIO_DEVICE_RESET)) {
153 hw_error("%s: Failed resetting device, err: %s", vbasedev->name,
154 strerror(errno));
157 migration->device_state = VFIO_DEVICE_STATE_RUNNING;
159 return ret;
162 static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
163 uint64_t data_size)
165 VFIOMigration *migration = vbasedev->migration;
166 int ret;
168 ret = qemu_file_get_to_fd(f, migration->data_fd, data_size);
169 trace_vfio_load_state_device_data(vbasedev->name, data_size, ret);
171 return ret;
174 static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
176 VFIODevice *vbasedev = opaque;
178 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE);
180 if (vbasedev->ops && vbasedev->ops->vfio_save_config) {
181 vbasedev->ops->vfio_save_config(vbasedev, f);
184 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
186 trace_vfio_save_device_config_state(vbasedev->name);
188 return qemu_file_get_error(f);
191 static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
193 VFIODevice *vbasedev = opaque;
194 uint64_t data;
196 if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
197 int ret;
199 ret = vbasedev->ops->vfio_load_config(vbasedev, f);
200 if (ret) {
201 error_report("%s: Failed to load device config space",
202 vbasedev->name);
203 return ret;
207 data = qemu_get_be64(f);
208 if (data != VFIO_MIG_FLAG_END_OF_STATE) {
209 error_report("%s: Failed loading device config space, "
210 "end flag incorrect 0x%"PRIx64, vbasedev->name, data);
211 return -EINVAL;
214 trace_vfio_load_device_config_state(vbasedev->name);
215 return qemu_file_get_error(f);
218 static void vfio_migration_cleanup(VFIODevice *vbasedev)
220 VFIOMigration *migration = vbasedev->migration;
222 close(migration->data_fd);
223 migration->data_fd = -1;
226 static int vfio_query_stop_copy_size(VFIODevice *vbasedev,
227 uint64_t *stop_copy_size)
229 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
230 sizeof(struct vfio_device_feature_mig_data_size),
231 sizeof(uint64_t))] = {};
232 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
233 struct vfio_device_feature_mig_data_size *mig_data_size =
234 (struct vfio_device_feature_mig_data_size *)feature->data;
236 feature->argsz = sizeof(buf);
237 feature->flags =
238 VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIG_DATA_SIZE;
240 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
241 return -errno;
244 *stop_copy_size = mig_data_size->stop_copy_length;
246 return 0;
249 static int vfio_query_precopy_size(VFIOMigration *migration)
251 struct vfio_precopy_info precopy = {
252 .argsz = sizeof(precopy),
255 migration->precopy_init_size = 0;
256 migration->precopy_dirty_size = 0;
258 if (ioctl(migration->data_fd, VFIO_MIG_GET_PRECOPY_INFO, &precopy)) {
259 return -errno;
262 migration->precopy_init_size = precopy.initial_bytes;
263 migration->precopy_dirty_size = precopy.dirty_bytes;
265 return 0;
268 /* Returns the size of saved data on success and -errno on error */
269 static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration)
271 ssize_t data_size;
273 data_size = read(migration->data_fd, migration->data_buffer,
274 migration->data_buffer_size);
275 if (data_size < 0) {
277 * Pre-copy emptied all the device state for now. For more information,
278 * please refer to the Linux kernel VFIO uAPI.
280 if (errno == ENOMSG) {
281 return 0;
284 return -errno;
286 if (data_size == 0) {
287 return 0;
290 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
291 qemu_put_be64(f, data_size);
292 qemu_put_buffer(f, migration->data_buffer, data_size);
293 bytes_transferred += data_size;
295 trace_vfio_save_block(migration->vbasedev->name, data_size);
297 return qemu_file_get_error(f) ?: data_size;
300 static void vfio_update_estimated_pending_data(VFIOMigration *migration,
301 uint64_t data_size)
303 if (!data_size) {
305 * Pre-copy emptied all the device state for now, update estimated sizes
306 * accordingly.
308 migration->precopy_init_size = 0;
309 migration->precopy_dirty_size = 0;
311 return;
314 if (migration->precopy_init_size) {
315 uint64_t init_size = MIN(migration->precopy_init_size, data_size);
317 migration->precopy_init_size -= init_size;
318 data_size -= init_size;
321 migration->precopy_dirty_size -= MIN(migration->precopy_dirty_size,
322 data_size);
325 static bool vfio_precopy_supported(VFIODevice *vbasedev)
327 VFIOMigration *migration = vbasedev->migration;
329 return migration->mig_flags & VFIO_MIGRATION_PRE_COPY;
332 /* ---------------------------------------------------------------------- */
334 static int vfio_save_setup(QEMUFile *f, void *opaque)
336 VFIODevice *vbasedev = opaque;
337 VFIOMigration *migration = vbasedev->migration;
338 uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
340 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
342 vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
343 migration->data_buffer_size = MIN(VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE,
344 stop_copy_size);
345 migration->data_buffer = g_try_malloc0(migration->data_buffer_size);
346 if (!migration->data_buffer) {
347 error_report("%s: Failed to allocate migration data buffer",
348 vbasedev->name);
349 return -ENOMEM;
352 if (vfio_precopy_supported(vbasedev)) {
353 int ret;
355 switch (migration->device_state) {
356 case VFIO_DEVICE_STATE_RUNNING:
357 ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_PRE_COPY,
358 VFIO_DEVICE_STATE_RUNNING);
359 if (ret) {
360 return ret;
363 vfio_query_precopy_size(migration);
365 break;
366 case VFIO_DEVICE_STATE_STOP:
367 /* vfio_save_complete_precopy() will go to STOP_COPY */
368 break;
369 default:
370 return -EINVAL;
374 trace_vfio_save_setup(vbasedev->name, migration->data_buffer_size);
376 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
378 return qemu_file_get_error(f);
381 static void vfio_save_cleanup(void *opaque)
383 VFIODevice *vbasedev = opaque;
384 VFIOMigration *migration = vbasedev->migration;
387 * Changing device state from STOP_COPY to STOP can take time. Do it here,
388 * after migration has completed, so it won't increase downtime.
390 if (migration->device_state == VFIO_DEVICE_STATE_STOP_COPY) {
392 * If setting the device in STOP state fails, the device should be
393 * reset. To do so, use ERROR state as a recover state.
395 vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP,
396 VFIO_DEVICE_STATE_ERROR);
399 g_free(migration->data_buffer);
400 migration->data_buffer = NULL;
401 migration->precopy_init_size = 0;
402 migration->precopy_dirty_size = 0;
403 migration->initial_data_sent = false;
404 vfio_migration_cleanup(vbasedev);
405 trace_vfio_save_cleanup(vbasedev->name);
408 static void vfio_state_pending_estimate(void *opaque, uint64_t *must_precopy,
409 uint64_t *can_postcopy)
411 VFIODevice *vbasedev = opaque;
412 VFIOMigration *migration = vbasedev->migration;
414 if (migration->device_state != VFIO_DEVICE_STATE_PRE_COPY) {
415 return;
418 *must_precopy +=
419 migration->precopy_init_size + migration->precopy_dirty_size;
421 trace_vfio_state_pending_estimate(vbasedev->name, *must_precopy,
422 *can_postcopy,
423 migration->precopy_init_size,
424 migration->precopy_dirty_size);
428 * Migration size of VFIO devices can be as little as a few KBs or as big as
429 * many GBs. This value should be big enough to cover the worst case.
431 #define VFIO_MIG_STOP_COPY_SIZE (100 * GiB)
433 static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy,
434 uint64_t *can_postcopy)
436 VFIODevice *vbasedev = opaque;
437 VFIOMigration *migration = vbasedev->migration;
438 uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE;
441 * If getting pending migration size fails, VFIO_MIG_STOP_COPY_SIZE is
442 * reported so downtime limit won't be violated.
444 vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
445 *must_precopy += stop_copy_size;
447 if (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) {
448 vfio_query_precopy_size(migration);
450 *must_precopy +=
451 migration->precopy_init_size + migration->precopy_dirty_size;
454 trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy,
455 stop_copy_size, migration->precopy_init_size,
456 migration->precopy_dirty_size);
459 static bool vfio_is_active_iterate(void *opaque)
461 VFIODevice *vbasedev = opaque;
462 VFIOMigration *migration = vbasedev->migration;
464 return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY;
467 static int vfio_save_iterate(QEMUFile *f, void *opaque)
469 VFIODevice *vbasedev = opaque;
470 VFIOMigration *migration = vbasedev->migration;
471 ssize_t data_size;
473 data_size = vfio_save_block(f, migration);
474 if (data_size < 0) {
475 return data_size;
478 vfio_update_estimated_pending_data(migration, data_size);
480 if (migrate_switchover_ack() && !migration->precopy_init_size &&
481 !migration->initial_data_sent) {
482 qemu_put_be64(f, VFIO_MIG_FLAG_DEV_INIT_DATA_SENT);
483 migration->initial_data_sent = true;
484 } else {
485 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
488 trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size,
489 migration->precopy_dirty_size);
492 * A VFIO device's pre-copy dirty_bytes is not guaranteed to reach zero.
493 * Return 1 so following handlers will not be potentially blocked.
495 return 1;
498 static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
500 VFIODevice *vbasedev = opaque;
501 ssize_t data_size;
502 int ret;
504 /* We reach here with device state STOP or STOP_COPY only */
505 ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
506 VFIO_DEVICE_STATE_STOP);
507 if (ret) {
508 return ret;
511 do {
512 data_size = vfio_save_block(f, vbasedev->migration);
513 if (data_size < 0) {
514 return data_size;
516 } while (data_size);
518 qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
519 ret = qemu_file_get_error(f);
520 if (ret) {
521 return ret;
524 trace_vfio_save_complete_precopy(vbasedev->name, ret);
526 return ret;
529 static void vfio_save_state(QEMUFile *f, void *opaque)
531 VFIODevice *vbasedev = opaque;
532 int ret;
534 ret = vfio_save_device_config_state(f, opaque);
535 if (ret) {
536 error_report("%s: Failed to save device config space",
537 vbasedev->name);
538 qemu_file_set_error(f, ret);
542 static int vfio_load_setup(QEMUFile *f, void *opaque)
544 VFIODevice *vbasedev = opaque;
546 return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
547 vbasedev->migration->device_state);
550 static int vfio_load_cleanup(void *opaque)
552 VFIODevice *vbasedev = opaque;
554 vfio_migration_cleanup(vbasedev);
555 trace_vfio_load_cleanup(vbasedev->name);
557 return 0;
560 static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
562 VFIODevice *vbasedev = opaque;
563 int ret = 0;
564 uint64_t data;
566 data = qemu_get_be64(f);
567 while (data != VFIO_MIG_FLAG_END_OF_STATE) {
569 trace_vfio_load_state(vbasedev->name, data);
571 switch (data) {
572 case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
574 return vfio_load_device_config_state(f, opaque);
576 case VFIO_MIG_FLAG_DEV_SETUP_STATE:
578 data = qemu_get_be64(f);
579 if (data == VFIO_MIG_FLAG_END_OF_STATE) {
580 return ret;
581 } else {
582 error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
583 vbasedev->name, data);
584 return -EINVAL;
586 break;
588 case VFIO_MIG_FLAG_DEV_DATA_STATE:
590 uint64_t data_size = qemu_get_be64(f);
592 if (data_size) {
593 ret = vfio_load_buffer(f, vbasedev, data_size);
594 if (ret < 0) {
595 return ret;
598 break;
600 case VFIO_MIG_FLAG_DEV_INIT_DATA_SENT:
602 if (!vfio_precopy_supported(vbasedev) ||
603 !migrate_switchover_ack()) {
604 error_report("%s: Received INIT_DATA_SENT but switchover ack "
605 "is not used", vbasedev->name);
606 return -EINVAL;
609 ret = qemu_loadvm_approve_switchover();
610 if (ret) {
611 error_report(
612 "%s: qemu_loadvm_approve_switchover failed, err=%d (%s)",
613 vbasedev->name, ret, strerror(-ret));
616 return ret;
618 default:
619 error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
620 return -EINVAL;
623 data = qemu_get_be64(f);
624 ret = qemu_file_get_error(f);
625 if (ret) {
626 return ret;
629 return ret;
632 static bool vfio_switchover_ack_needed(void *opaque)
634 VFIODevice *vbasedev = opaque;
636 return vfio_precopy_supported(vbasedev);
639 static const SaveVMHandlers savevm_vfio_handlers = {
640 .save_setup = vfio_save_setup,
641 .save_cleanup = vfio_save_cleanup,
642 .state_pending_estimate = vfio_state_pending_estimate,
643 .state_pending_exact = vfio_state_pending_exact,
644 .is_active_iterate = vfio_is_active_iterate,
645 .save_live_iterate = vfio_save_iterate,
646 .save_live_complete_precopy = vfio_save_complete_precopy,
647 .save_state = vfio_save_state,
648 .load_setup = vfio_load_setup,
649 .load_cleanup = vfio_load_cleanup,
650 .load_state = vfio_load_state,
651 .switchover_ack_needed = vfio_switchover_ack_needed,
654 /* ---------------------------------------------------------------------- */
656 static void vfio_vmstate_change(void *opaque, bool running, RunState state)
658 VFIODevice *vbasedev = opaque;
659 VFIOMigration *migration = vbasedev->migration;
660 enum vfio_device_mig_state new_state;
661 int ret;
663 if (running) {
664 new_state = VFIO_DEVICE_STATE_RUNNING;
665 } else {
666 new_state =
667 (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY &&
668 (state == RUN_STATE_FINISH_MIGRATE || state == RUN_STATE_PAUSED)) ?
669 VFIO_DEVICE_STATE_STOP_COPY :
670 VFIO_DEVICE_STATE_STOP;
674 * If setting the device in new_state fails, the device should be reset.
675 * To do so, use ERROR state as a recover state.
677 ret = vfio_migration_set_state(vbasedev, new_state,
678 VFIO_DEVICE_STATE_ERROR);
679 if (ret) {
681 * Migration should be aborted in this case, but vm_state_notify()
682 * currently does not support reporting failures.
684 if (migrate_get_current()->to_dst_file) {
685 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
689 trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
690 mig_state_to_str(new_state));
693 static void vfio_migration_state_notifier(Notifier *notifier, void *data)
695 MigrationState *s = data;
696 VFIOMigration *migration = container_of(notifier, VFIOMigration,
697 migration_state);
698 VFIODevice *vbasedev = migration->vbasedev;
700 trace_vfio_migration_state_notifier(vbasedev->name,
701 MigrationStatus_str(s->state));
703 switch (s->state) {
704 case MIGRATION_STATUS_CANCELLING:
705 case MIGRATION_STATUS_CANCELLED:
706 case MIGRATION_STATUS_FAILED:
708 * If setting the device in RUNNING state fails, the device should
709 * be reset. To do so, use ERROR state as a recover state.
711 vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RUNNING,
712 VFIO_DEVICE_STATE_ERROR);
716 static void vfio_migration_free(VFIODevice *vbasedev)
718 g_free(vbasedev->migration);
719 vbasedev->migration = NULL;
722 static int vfio_migration_query_flags(VFIODevice *vbasedev, uint64_t *mig_flags)
724 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
725 sizeof(struct vfio_device_feature_migration),
726 sizeof(uint64_t))] = {};
727 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
728 struct vfio_device_feature_migration *mig =
729 (struct vfio_device_feature_migration *)feature->data;
731 feature->argsz = sizeof(buf);
732 feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIGRATION;
733 if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) {
734 return -errno;
737 *mig_flags = mig->flags;
739 return 0;
742 static bool vfio_dma_logging_supported(VFIODevice *vbasedev)
744 uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature),
745 sizeof(uint64_t))] = {};
746 struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
748 feature->argsz = sizeof(buf);
749 feature->flags = VFIO_DEVICE_FEATURE_PROBE |
750 VFIO_DEVICE_FEATURE_DMA_LOGGING_START;
752 return !ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature);
755 static int vfio_migration_init(VFIODevice *vbasedev)
757 int ret;
758 Object *obj;
759 VFIOMigration *migration;
760 char id[256] = "";
761 g_autofree char *path = NULL, *oid = NULL;
762 uint64_t mig_flags = 0;
764 if (!vbasedev->ops->vfio_get_object) {
765 return -EINVAL;
768 obj = vbasedev->ops->vfio_get_object(vbasedev);
769 if (!obj) {
770 return -EINVAL;
773 ret = vfio_migration_query_flags(vbasedev, &mig_flags);
774 if (ret) {
775 return ret;
778 /* Basic migration functionality must be supported */
779 if (!(mig_flags & VFIO_MIGRATION_STOP_COPY)) {
780 return -EOPNOTSUPP;
783 vbasedev->migration = g_new0(VFIOMigration, 1);
784 migration = vbasedev->migration;
785 migration->vbasedev = vbasedev;
786 migration->device_state = VFIO_DEVICE_STATE_RUNNING;
787 migration->data_fd = -1;
788 migration->mig_flags = mig_flags;
790 vbasedev->dirty_pages_supported = vfio_dma_logging_supported(vbasedev);
792 oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
793 if (oid) {
794 path = g_strdup_printf("%s/vfio", oid);
795 } else {
796 path = g_strdup("vfio");
798 strpadcpy(id, sizeof(id), path, '\0');
800 register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
801 vbasedev);
803 migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev,
804 vfio_vmstate_change,
805 vbasedev);
806 migration->migration_state.notify = vfio_migration_state_notifier;
807 add_migration_state_change_notifier(&migration->migration_state);
809 return 0;
812 static void vfio_migration_deinit(VFIODevice *vbasedev)
814 VFIOMigration *migration = vbasedev->migration;
816 remove_migration_state_change_notifier(&migration->migration_state);
817 qemu_del_vm_change_state_handler(migration->vm_state);
818 unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
819 vfio_migration_free(vbasedev);
820 vfio_unblock_multiple_devices_migration();
823 static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp)
825 int ret;
827 if (vbasedev->enable_migration == ON_OFF_AUTO_ON) {
828 error_propagate(errp, err);
829 return -EINVAL;
832 vbasedev->migration_blocker = error_copy(err);
833 error_free(err);
835 ret = migrate_add_blocker(vbasedev->migration_blocker, errp);
836 if (ret < 0) {
837 error_free(vbasedev->migration_blocker);
838 vbasedev->migration_blocker = NULL;
841 return ret;
844 /* ---------------------------------------------------------------------- */
846 int64_t vfio_mig_bytes_transferred(void)
848 return bytes_transferred;
851 void vfio_reset_bytes_transferred(void)
853 bytes_transferred = 0;
857 * Return true when either migration initialized or blocker registered.
858 * Currently only return false when adding blocker fails which will
859 * de-register vfio device.
861 bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp)
863 Error *err = NULL;
864 int ret;
866 if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) {
867 error_setg(&err, "%s: Migration is disabled for VFIO device",
868 vbasedev->name);
869 return !vfio_block_migration(vbasedev, err, errp);
872 ret = vfio_migration_init(vbasedev);
873 if (ret) {
874 if (ret == -ENOTTY) {
875 error_setg(&err, "%s: VFIO migration is not supported in kernel",
876 vbasedev->name);
877 } else {
878 error_setg(&err,
879 "%s: Migration couldn't be initialized for VFIO device, "
880 "err: %d (%s)",
881 vbasedev->name, ret, strerror(-ret));
884 return !vfio_block_migration(vbasedev, err, errp);
887 if (!vbasedev->dirty_pages_supported) {
888 if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) {
889 error_setg(&err,
890 "%s: VFIO device doesn't support device dirty tracking",
891 vbasedev->name);
892 goto add_blocker;
895 warn_report("%s: VFIO device doesn't support device dirty tracking",
896 vbasedev->name);
899 ret = vfio_block_multiple_devices_migration(vbasedev, errp);
900 if (ret) {
901 goto out_deinit;
904 if (vfio_viommu_preset(vbasedev)) {
905 error_setg(&err, "%s: Migration is currently not supported "
906 "with vIOMMU enabled", vbasedev->name);
907 goto add_blocker;
910 trace_vfio_migration_realize(vbasedev->name);
911 return true;
913 add_blocker:
914 ret = vfio_block_migration(vbasedev, err, errp);
915 out_deinit:
916 if (ret) {
917 vfio_migration_deinit(vbasedev);
919 return !ret;
922 void vfio_migration_exit(VFIODevice *vbasedev)
924 if (vbasedev->migration) {
925 vfio_migration_deinit(vbasedev);
928 if (vbasedev->migration_blocker) {
929 migrate_del_blocker(vbasedev->migration_blocker);
930 error_free(vbasedev->migration_blocker);
931 vbasedev->migration_blocker = NULL;