Add migration-capability boolean for postcopy-ram.
[qemu/cris-port.git] / migration / migration.c
blobf849f890d91c3506558e3c98c10d3421d266b2d5
1 /*
2 * QEMU live migration
4 * Copyright IBM, Corp. 2008
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
12 * Contributions after 2012-01-13 are licensed under the terms of the
13 * GNU GPL, version 2 or (at your option) any later version.
16 #include "qemu-common.h"
17 #include "qemu/error-report.h"
18 #include "qemu/main-loop.h"
19 #include "migration/migration.h"
20 #include "migration/qemu-file.h"
21 #include "sysemu/sysemu.h"
22 #include "block/block.h"
23 #include "qapi/qmp/qerror.h"
24 #include "qemu/sockets.h"
25 #include "qemu/rcu.h"
26 #include "migration/block.h"
27 #include "qemu/thread.h"
28 #include "qmp-commands.h"
29 #include "trace.h"
30 #include "qapi/util.h"
31 #include "qapi-event.h"
32 #include "qom/cpu.h"
34 #define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */
36 /* Amount of time to allocate to each "chunk" of bandwidth-throttled
37 * data. */
38 #define BUFFER_DELAY 100
39 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
41 /* Default compression thread count */
42 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
43 /* Default decompression thread count, usually decompression is at
44 * least 4 times as fast as compression.*/
45 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
46 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
47 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
48 /* Define default autoconverge cpu throttle migration parameters */
49 #define DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL 20
50 #define DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT 10
52 /* Migration XBZRLE default cache size */
53 #define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
55 static NotifierList migration_state_notifiers =
56 NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
58 static bool deferred_incoming;
60 /* When we add fault tolerance, we could have several
61 migrations at once. For now we don't need to add
62 dynamic creation of migration */
64 /* For outgoing */
65 MigrationState *migrate_get_current(void)
67 static MigrationState current_migration = {
68 .state = MIGRATION_STATUS_NONE,
69 .bandwidth_limit = MAX_THROTTLE,
70 .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
71 .mbps = -1,
72 .parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] =
73 DEFAULT_MIGRATE_COMPRESS_LEVEL,
74 .parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] =
75 DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT,
76 .parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
77 DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT,
78 .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
79 DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL,
80 .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
81 DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT,
84 return &current_migration;
87 /* For incoming */
88 static MigrationIncomingState *mis_current;
90 MigrationIncomingState *migration_incoming_get_current(void)
92 return mis_current;
95 MigrationIncomingState *migration_incoming_state_new(QEMUFile* f)
97 mis_current = g_new0(MigrationIncomingState, 1);
98 mis_current->from_src_file = f;
99 QLIST_INIT(&mis_current->loadvm_handlers);
100 qemu_mutex_init(&mis_current->rp_mutex);
101 qemu_event_init(&mis_current->main_thread_load_event, false);
103 return mis_current;
106 void migration_incoming_state_destroy(void)
108 qemu_event_destroy(&mis_current->main_thread_load_event);
109 loadvm_free_handlers(mis_current);
110 g_free(mis_current);
111 mis_current = NULL;
115 typedef struct {
116 bool optional;
117 uint32_t size;
118 uint8_t runstate[100];
119 RunState state;
120 bool received;
121 } GlobalState;
123 static GlobalState global_state;
125 int global_state_store(void)
127 if (!runstate_store((char *)global_state.runstate,
128 sizeof(global_state.runstate))) {
129 error_report("runstate name too big: %s", global_state.runstate);
130 trace_migrate_state_too_big();
131 return -EINVAL;
133 return 0;
136 void global_state_store_running(void)
138 const char *state = RunState_lookup[RUN_STATE_RUNNING];
139 strncpy((char *)global_state.runstate,
140 state, sizeof(global_state.runstate));
143 static bool global_state_received(void)
145 return global_state.received;
148 static RunState global_state_get_runstate(void)
150 return global_state.state;
153 void global_state_set_optional(void)
155 global_state.optional = true;
158 static bool global_state_needed(void *opaque)
160 GlobalState *s = opaque;
161 char *runstate = (char *)s->runstate;
163 /* If it is not optional, it is mandatory */
165 if (s->optional == false) {
166 return true;
169 /* If state is running or paused, it is not needed */
171 if (strcmp(runstate, "running") == 0 ||
172 strcmp(runstate, "paused") == 0) {
173 return false;
176 /* for any other state it is needed */
177 return true;
180 static int global_state_post_load(void *opaque, int version_id)
182 GlobalState *s = opaque;
183 Error *local_err = NULL;
184 int r;
185 char *runstate = (char *)s->runstate;
187 s->received = true;
188 trace_migrate_global_state_post_load(runstate);
190 r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX,
191 -1, &local_err);
193 if (r == -1) {
194 if (local_err) {
195 error_report_err(local_err);
197 return -EINVAL;
199 s->state = r;
201 return 0;
204 static void global_state_pre_save(void *opaque)
206 GlobalState *s = opaque;
208 trace_migrate_global_state_pre_save((char *)s->runstate);
209 s->size = strlen((char *)s->runstate) + 1;
212 static const VMStateDescription vmstate_globalstate = {
213 .name = "globalstate",
214 .version_id = 1,
215 .minimum_version_id = 1,
216 .post_load = global_state_post_load,
217 .pre_save = global_state_pre_save,
218 .needed = global_state_needed,
219 .fields = (VMStateField[]) {
220 VMSTATE_UINT32(size, GlobalState),
221 VMSTATE_BUFFER(runstate, GlobalState),
222 VMSTATE_END_OF_LIST()
226 void register_global_state(void)
228 /* We would use it independently that we receive it */
229 strcpy((char *)&global_state.runstate, "");
230 global_state.received = false;
231 vmstate_register(NULL, 0, &vmstate_globalstate, &global_state);
234 static void migrate_generate_event(int new_state)
236 if (migrate_use_events()) {
237 qapi_event_send_migration(new_state, &error_abort);
242 * Called on -incoming with a defer: uri.
243 * The migration can be started later after any parameters have been
244 * changed.
246 static void deferred_incoming_migration(Error **errp)
248 if (deferred_incoming) {
249 error_setg(errp, "Incoming migration already deferred");
251 deferred_incoming = true;
254 void qemu_start_incoming_migration(const char *uri, Error **errp)
256 const char *p;
258 qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort);
259 if (!strcmp(uri, "defer")) {
260 deferred_incoming_migration(errp);
261 } else if (strstart(uri, "tcp:", &p)) {
262 tcp_start_incoming_migration(p, errp);
263 #ifdef CONFIG_RDMA
264 } else if (strstart(uri, "rdma:", &p)) {
265 rdma_start_incoming_migration(p, errp);
266 #endif
267 #if !defined(WIN32)
268 } else if (strstart(uri, "exec:", &p)) {
269 exec_start_incoming_migration(p, errp);
270 } else if (strstart(uri, "unix:", &p)) {
271 unix_start_incoming_migration(p, errp);
272 } else if (strstart(uri, "fd:", &p)) {
273 fd_start_incoming_migration(p, errp);
274 #endif
275 } else {
276 error_setg(errp, "unknown migration protocol: %s", uri);
280 static void process_incoming_migration_co(void *opaque)
282 QEMUFile *f = opaque;
283 Error *local_err = NULL;
284 int ret;
286 migration_incoming_state_new(f);
287 migrate_generate_event(MIGRATION_STATUS_ACTIVE);
288 ret = qemu_loadvm_state(f);
290 qemu_fclose(f);
291 free_xbzrle_decoded_buf();
292 migration_incoming_state_destroy();
294 if (ret < 0) {
295 migrate_generate_event(MIGRATION_STATUS_FAILED);
296 error_report("load of migration failed: %s", strerror(-ret));
297 migrate_decompress_threads_join();
298 exit(EXIT_FAILURE);
301 /* Make sure all file formats flush their mutable metadata */
302 bdrv_invalidate_cache_all(&local_err);
303 if (local_err) {
304 migrate_generate_event(MIGRATION_STATUS_FAILED);
305 error_report_err(local_err);
306 migrate_decompress_threads_join();
307 exit(EXIT_FAILURE);
311 * This must happen after all error conditions are dealt with and
312 * we're sure the VM is going to be running on this host.
314 qemu_announce_self();
316 /* If global state section was not received or we are in running
317 state, we need to obey autostart. Any other state is set with
318 runstate_set. */
320 if (!global_state_received() ||
321 global_state_get_runstate() == RUN_STATE_RUNNING) {
322 if (autostart) {
323 vm_start();
324 } else {
325 runstate_set(RUN_STATE_PAUSED);
327 } else {
328 runstate_set(global_state_get_runstate());
330 migrate_decompress_threads_join();
332 * This must happen after any state changes since as soon as an external
333 * observer sees this event they might start to prod at the VM assuming
334 * it's ready to use.
336 migrate_generate_event(MIGRATION_STATUS_COMPLETED);
339 void process_incoming_migration(QEMUFile *f)
341 Coroutine *co = qemu_coroutine_create(process_incoming_migration_co);
342 int fd = qemu_get_fd(f);
344 assert(fd != -1);
345 migrate_decompress_threads_create();
346 qemu_set_nonblock(fd);
347 qemu_coroutine_enter(co, f);
351 * Send a message on the return channel back to the source
352 * of the migration.
354 void migrate_send_rp_message(MigrationIncomingState *mis,
355 enum mig_rp_message_type message_type,
356 uint16_t len, void *data)
358 trace_migrate_send_rp_message((int)message_type, len);
359 qemu_mutex_lock(&mis->rp_mutex);
360 qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
361 qemu_put_be16(mis->to_src_file, len);
362 qemu_put_buffer(mis->to_src_file, data, len);
363 qemu_fflush(mis->to_src_file);
364 qemu_mutex_unlock(&mis->rp_mutex);
368 * Send a 'SHUT' message on the return channel with the given value
369 * to indicate that we've finished with the RP. Non-0 value indicates
370 * error.
372 void migrate_send_rp_shut(MigrationIncomingState *mis,
373 uint32_t value)
375 uint32_t buf;
377 buf = cpu_to_be32(value);
378 migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
382 * Send a 'PONG' message on the return channel with the given value
383 * (normally in response to a 'PING')
385 void migrate_send_rp_pong(MigrationIncomingState *mis,
386 uint32_t value)
388 uint32_t buf;
390 buf = cpu_to_be32(value);
391 migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
394 /* amount of nanoseconds we are willing to wait for migration to be down.
395 * the choice of nanoseconds is because it is the maximum resolution that
396 * get_clock() can achieve. It is an internal measure. All user-visible
397 * units must be in seconds */
398 static uint64_t max_downtime = 300000000;
400 uint64_t migrate_max_downtime(void)
402 return max_downtime;
405 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
407 MigrationCapabilityStatusList *head = NULL;
408 MigrationCapabilityStatusList *caps;
409 MigrationState *s = migrate_get_current();
410 int i;
412 caps = NULL; /* silence compiler warning */
413 for (i = 0; i < MIGRATION_CAPABILITY_MAX; i++) {
414 if (head == NULL) {
415 head = g_malloc0(sizeof(*caps));
416 caps = head;
417 } else {
418 caps->next = g_malloc0(sizeof(*caps));
419 caps = caps->next;
421 caps->value =
422 g_malloc(sizeof(*caps->value));
423 caps->value->capability = i;
424 caps->value->state = s->enabled_capabilities[i];
427 return head;
430 MigrationParameters *qmp_query_migrate_parameters(Error **errp)
432 MigrationParameters *params;
433 MigrationState *s = migrate_get_current();
435 params = g_malloc0(sizeof(*params));
436 params->compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
437 params->compress_threads =
438 s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
439 params->decompress_threads =
440 s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
441 params->x_cpu_throttle_initial =
442 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL];
443 params->x_cpu_throttle_increment =
444 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT];
446 return params;
450 * Return true if we're already in the middle of a migration
451 * (i.e. any of the active or setup states)
453 static bool migration_is_setup_or_active(int state)
455 switch (state) {
456 case MIGRATION_STATUS_ACTIVE:
457 case MIGRATION_STATUS_SETUP:
458 return true;
460 default:
461 return false;
466 static void get_xbzrle_cache_stats(MigrationInfo *info)
468 if (migrate_use_xbzrle()) {
469 info->has_xbzrle_cache = true;
470 info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
471 info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
472 info->xbzrle_cache->bytes = xbzrle_mig_bytes_transferred();
473 info->xbzrle_cache->pages = xbzrle_mig_pages_transferred();
474 info->xbzrle_cache->cache_miss = xbzrle_mig_pages_cache_miss();
475 info->xbzrle_cache->cache_miss_rate = xbzrle_mig_cache_miss_rate();
476 info->xbzrle_cache->overflow = xbzrle_mig_pages_overflow();
480 MigrationInfo *qmp_query_migrate(Error **errp)
482 MigrationInfo *info = g_malloc0(sizeof(*info));
483 MigrationState *s = migrate_get_current();
485 switch (s->state) {
486 case MIGRATION_STATUS_NONE:
487 /* no migration has happened ever */
488 break;
489 case MIGRATION_STATUS_SETUP:
490 info->has_status = true;
491 info->has_total_time = false;
492 break;
493 case MIGRATION_STATUS_ACTIVE:
494 case MIGRATION_STATUS_CANCELLING:
495 info->has_status = true;
496 info->has_total_time = true;
497 info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
498 - s->total_time;
499 info->has_expected_downtime = true;
500 info->expected_downtime = s->expected_downtime;
501 info->has_setup_time = true;
502 info->setup_time = s->setup_time;
504 info->has_ram = true;
505 info->ram = g_malloc0(sizeof(*info->ram));
506 info->ram->transferred = ram_bytes_transferred();
507 info->ram->remaining = ram_bytes_remaining();
508 info->ram->total = ram_bytes_total();
509 info->ram->duplicate = dup_mig_pages_transferred();
510 info->ram->skipped = skipped_mig_pages_transferred();
511 info->ram->normal = norm_mig_pages_transferred();
512 info->ram->normal_bytes = norm_mig_bytes_transferred();
513 info->ram->dirty_pages_rate = s->dirty_pages_rate;
514 info->ram->mbps = s->mbps;
515 info->ram->dirty_sync_count = s->dirty_sync_count;
517 if (blk_mig_active()) {
518 info->has_disk = true;
519 info->disk = g_malloc0(sizeof(*info->disk));
520 info->disk->transferred = blk_mig_bytes_transferred();
521 info->disk->remaining = blk_mig_bytes_remaining();
522 info->disk->total = blk_mig_bytes_total();
525 if (cpu_throttle_active()) {
526 info->has_x_cpu_throttle_percentage = true;
527 info->x_cpu_throttle_percentage = cpu_throttle_get_percentage();
530 get_xbzrle_cache_stats(info);
531 break;
532 case MIGRATION_STATUS_COMPLETED:
533 get_xbzrle_cache_stats(info);
535 info->has_status = true;
536 info->has_total_time = true;
537 info->total_time = s->total_time;
538 info->has_downtime = true;
539 info->downtime = s->downtime;
540 info->has_setup_time = true;
541 info->setup_time = s->setup_time;
543 info->has_ram = true;
544 info->ram = g_malloc0(sizeof(*info->ram));
545 info->ram->transferred = ram_bytes_transferred();
546 info->ram->remaining = 0;
547 info->ram->total = ram_bytes_total();
548 info->ram->duplicate = dup_mig_pages_transferred();
549 info->ram->skipped = skipped_mig_pages_transferred();
550 info->ram->normal = norm_mig_pages_transferred();
551 info->ram->normal_bytes = norm_mig_bytes_transferred();
552 info->ram->mbps = s->mbps;
553 info->ram->dirty_sync_count = s->dirty_sync_count;
554 break;
555 case MIGRATION_STATUS_FAILED:
556 info->has_status = true;
557 break;
558 case MIGRATION_STATUS_CANCELLED:
559 info->has_status = true;
560 break;
562 info->status = s->state;
564 return info;
567 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
568 Error **errp)
570 MigrationState *s = migrate_get_current();
571 MigrationCapabilityStatusList *cap;
573 if (migration_is_setup_or_active(s->state)) {
574 error_setg(errp, QERR_MIGRATION_ACTIVE);
575 return;
578 for (cap = params; cap; cap = cap->next) {
579 s->enabled_capabilities[cap->value->capability] = cap->value->state;
582 if (migrate_postcopy_ram()) {
583 if (migrate_use_compression()) {
584 /* The decompression threads asynchronously write into RAM
585 * rather than use the atomic copies needed to avoid
586 * userfaulting. It should be possible to fix the decompression
587 * threads for compatibility in future.
589 error_report("Postcopy is not currently compatible with "
590 "compression");
591 s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM] =
592 false;
597 void qmp_migrate_set_parameters(bool has_compress_level,
598 int64_t compress_level,
599 bool has_compress_threads,
600 int64_t compress_threads,
601 bool has_decompress_threads,
602 int64_t decompress_threads,
603 bool has_x_cpu_throttle_initial,
604 int64_t x_cpu_throttle_initial,
605 bool has_x_cpu_throttle_increment,
606 int64_t x_cpu_throttle_increment, Error **errp)
608 MigrationState *s = migrate_get_current();
610 if (has_compress_level && (compress_level < 0 || compress_level > 9)) {
611 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
612 "is invalid, it should be in the range of 0 to 9");
613 return;
615 if (has_compress_threads &&
616 (compress_threads < 1 || compress_threads > 255)) {
617 error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
618 "compress_threads",
619 "is invalid, it should be in the range of 1 to 255");
620 return;
622 if (has_decompress_threads &&
623 (decompress_threads < 1 || decompress_threads > 255)) {
624 error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
625 "decompress_threads",
626 "is invalid, it should be in the range of 1 to 255");
627 return;
629 if (has_x_cpu_throttle_initial &&
630 (x_cpu_throttle_initial < 1 || x_cpu_throttle_initial > 99)) {
631 error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
632 "x_cpu_throttle_initial",
633 "an integer in the range of 1 to 99");
635 if (has_x_cpu_throttle_increment &&
636 (x_cpu_throttle_increment < 1 || x_cpu_throttle_increment > 99)) {
637 error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
638 "x_cpu_throttle_increment",
639 "an integer in the range of 1 to 99");
642 if (has_compress_level) {
643 s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level;
645 if (has_compress_threads) {
646 s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] = compress_threads;
648 if (has_decompress_threads) {
649 s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
650 decompress_threads;
652 if (has_x_cpu_throttle_initial) {
653 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
654 x_cpu_throttle_initial;
657 if (has_x_cpu_throttle_increment) {
658 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
659 x_cpu_throttle_increment;
663 /* shared migration helpers */
665 static void migrate_set_state(MigrationState *s, int old_state, int new_state)
667 if (atomic_cmpxchg(&s->state, old_state, new_state) == old_state) {
668 trace_migrate_set_state(new_state);
669 migrate_generate_event(new_state);
673 static void migrate_fd_cleanup(void *opaque)
675 MigrationState *s = opaque;
677 qemu_bh_delete(s->cleanup_bh);
678 s->cleanup_bh = NULL;
680 if (s->file) {
681 trace_migrate_fd_cleanup();
682 qemu_mutex_unlock_iothread();
683 qemu_thread_join(&s->thread);
684 qemu_mutex_lock_iothread();
686 migrate_compress_threads_join();
687 qemu_fclose(s->file);
688 s->file = NULL;
691 assert(s->state != MIGRATION_STATUS_ACTIVE);
693 if (s->state == MIGRATION_STATUS_CANCELLING) {
694 migrate_set_state(s, MIGRATION_STATUS_CANCELLING,
695 MIGRATION_STATUS_CANCELLED);
698 notifier_list_notify(&migration_state_notifiers, s);
701 void migrate_fd_error(MigrationState *s)
703 trace_migrate_fd_error();
704 assert(s->file == NULL);
705 migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
706 notifier_list_notify(&migration_state_notifiers, s);
709 static void migrate_fd_cancel(MigrationState *s)
711 int old_state ;
712 QEMUFile *f = migrate_get_current()->file;
713 trace_migrate_fd_cancel();
715 if (s->rp_state.from_dst_file) {
716 /* shutdown the rp socket, so causing the rp thread to shutdown */
717 qemu_file_shutdown(s->rp_state.from_dst_file);
720 do {
721 old_state = s->state;
722 if (!migration_is_setup_or_active(old_state)) {
723 break;
725 migrate_set_state(s, old_state, MIGRATION_STATUS_CANCELLING);
726 } while (s->state != MIGRATION_STATUS_CANCELLING);
729 * If we're unlucky the migration code might be stuck somewhere in a
730 * send/write while the network has failed and is waiting to timeout;
731 * if we've got shutdown(2) available then we can force it to quit.
732 * The outgoing qemu file gets closed in migrate_fd_cleanup that is
733 * called in a bh, so there is no race against this cancel.
735 if (s->state == MIGRATION_STATUS_CANCELLING && f) {
736 qemu_file_shutdown(f);
740 void add_migration_state_change_notifier(Notifier *notify)
742 notifier_list_add(&migration_state_notifiers, notify);
745 void remove_migration_state_change_notifier(Notifier *notify)
747 notifier_remove(notify);
750 bool migration_in_setup(MigrationState *s)
752 return s->state == MIGRATION_STATUS_SETUP;
755 bool migration_has_finished(MigrationState *s)
757 return s->state == MIGRATION_STATUS_COMPLETED;
760 bool migration_has_failed(MigrationState *s)
762 return (s->state == MIGRATION_STATUS_CANCELLED ||
763 s->state == MIGRATION_STATUS_FAILED);
766 MigrationState *migrate_init(const MigrationParams *params)
768 MigrationState *s = migrate_get_current();
769 int64_t bandwidth_limit = s->bandwidth_limit;
770 bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
771 int64_t xbzrle_cache_size = s->xbzrle_cache_size;
772 int compress_level = s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
773 int compress_thread_count =
774 s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
775 int decompress_thread_count =
776 s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
777 int x_cpu_throttle_initial =
778 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL];
779 int x_cpu_throttle_increment =
780 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT];
782 memcpy(enabled_capabilities, s->enabled_capabilities,
783 sizeof(enabled_capabilities));
785 memset(s, 0, sizeof(*s));
786 s->params = *params;
787 memcpy(s->enabled_capabilities, enabled_capabilities,
788 sizeof(enabled_capabilities));
789 s->xbzrle_cache_size = xbzrle_cache_size;
791 s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level;
792 s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS] =
793 compress_thread_count;
794 s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
795 decompress_thread_count;
796 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] =
797 x_cpu_throttle_initial;
798 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] =
799 x_cpu_throttle_increment;
800 s->bandwidth_limit = bandwidth_limit;
801 migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
803 s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
804 return s;
807 static GSList *migration_blockers;
809 void migrate_add_blocker(Error *reason)
811 migration_blockers = g_slist_prepend(migration_blockers, reason);
814 void migrate_del_blocker(Error *reason)
816 migration_blockers = g_slist_remove(migration_blockers, reason);
819 void qmp_migrate_incoming(const char *uri, Error **errp)
821 Error *local_err = NULL;
822 static bool once = true;
824 if (!deferred_incoming) {
825 error_setg(errp, "For use with '-incoming defer'");
826 return;
828 if (!once) {
829 error_setg(errp, "The incoming migration has already been started");
832 qemu_start_incoming_migration(uri, &local_err);
834 if (local_err) {
835 error_propagate(errp, local_err);
836 return;
839 once = false;
842 void qmp_migrate(const char *uri, bool has_blk, bool blk,
843 bool has_inc, bool inc, bool has_detach, bool detach,
844 Error **errp)
846 Error *local_err = NULL;
847 MigrationState *s = migrate_get_current();
848 MigrationParams params;
849 const char *p;
851 params.blk = has_blk && blk;
852 params.shared = has_inc && inc;
854 if (migration_is_setup_or_active(s->state) ||
855 s->state == MIGRATION_STATUS_CANCELLING) {
856 error_setg(errp, QERR_MIGRATION_ACTIVE);
857 return;
859 if (runstate_check(RUN_STATE_INMIGRATE)) {
860 error_setg(errp, "Guest is waiting for an incoming migration");
861 return;
864 if (qemu_savevm_state_blocked(errp)) {
865 return;
868 if (migration_blockers) {
869 *errp = error_copy(migration_blockers->data);
870 return;
873 /* We are starting a new migration, so we want to start in a clean
874 state. This change is only needed if previous migration
875 failed/was cancelled. We don't use migrate_set_state() because
876 we are setting the initial state, not changing it. */
877 s->state = MIGRATION_STATUS_NONE;
879 s = migrate_init(&params);
881 if (strstart(uri, "tcp:", &p)) {
882 tcp_start_outgoing_migration(s, p, &local_err);
883 #ifdef CONFIG_RDMA
884 } else if (strstart(uri, "rdma:", &p)) {
885 rdma_start_outgoing_migration(s, p, &local_err);
886 #endif
887 #if !defined(WIN32)
888 } else if (strstart(uri, "exec:", &p)) {
889 exec_start_outgoing_migration(s, p, &local_err);
890 } else if (strstart(uri, "unix:", &p)) {
891 unix_start_outgoing_migration(s, p, &local_err);
892 } else if (strstart(uri, "fd:", &p)) {
893 fd_start_outgoing_migration(s, p, &local_err);
894 #endif
895 } else {
896 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
897 "a valid migration protocol");
898 migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
899 return;
902 if (local_err) {
903 migrate_fd_error(s);
904 error_propagate(errp, local_err);
905 return;
909 void qmp_migrate_cancel(Error **errp)
911 migrate_fd_cancel(migrate_get_current());
914 void qmp_migrate_set_cache_size(int64_t value, Error **errp)
916 MigrationState *s = migrate_get_current();
917 int64_t new_size;
919 /* Check for truncation */
920 if (value != (size_t)value) {
921 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
922 "exceeding address space");
923 return;
926 /* Cache should not be larger than guest ram size */
927 if (value > ram_bytes_total()) {
928 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
929 "exceeds guest ram size ");
930 return;
933 new_size = xbzrle_cache_resize(value);
934 if (new_size < 0) {
935 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
936 "is smaller than page size");
937 return;
940 s->xbzrle_cache_size = new_size;
943 int64_t qmp_query_migrate_cache_size(Error **errp)
945 return migrate_xbzrle_cache_size();
948 void qmp_migrate_set_speed(int64_t value, Error **errp)
950 MigrationState *s;
952 if (value < 0) {
953 value = 0;
955 if (value > SIZE_MAX) {
956 value = SIZE_MAX;
959 s = migrate_get_current();
960 s->bandwidth_limit = value;
961 if (s->file) {
962 qemu_file_set_rate_limit(s->file, s->bandwidth_limit / XFER_LIMIT_RATIO);
966 void qmp_migrate_set_downtime(double value, Error **errp)
968 value *= 1e9;
969 value = MAX(0, MIN(UINT64_MAX, value));
970 max_downtime = (uint64_t)value;
973 bool migrate_postcopy_ram(void)
975 MigrationState *s;
977 s = migrate_get_current();
979 return s->enabled_capabilities[MIGRATION_CAPABILITY_X_POSTCOPY_RAM];
982 bool migrate_auto_converge(void)
984 MigrationState *s;
986 s = migrate_get_current();
988 return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
991 bool migrate_zero_blocks(void)
993 MigrationState *s;
995 s = migrate_get_current();
997 return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
1000 bool migrate_use_compression(void)
1002 MigrationState *s;
1004 s = migrate_get_current();
1006 return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
1009 int migrate_compress_level(void)
1011 MigrationState *s;
1013 s = migrate_get_current();
1015 return s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL];
1018 int migrate_compress_threads(void)
1020 MigrationState *s;
1022 s = migrate_get_current();
1024 return s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS];
1027 int migrate_decompress_threads(void)
1029 MigrationState *s;
1031 s = migrate_get_current();
1033 return s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
1036 bool migrate_use_events(void)
1038 MigrationState *s;
1040 s = migrate_get_current();
1042 return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
1045 int migrate_use_xbzrle(void)
1047 MigrationState *s;
1049 s = migrate_get_current();
1051 return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
1054 int64_t migrate_xbzrle_cache_size(void)
1056 MigrationState *s;
1058 s = migrate_get_current();
1060 return s->xbzrle_cache_size;
1063 /* migration thread support */
1065 * Something bad happened to the RP stream, mark an error
1066 * The caller shall print or trace something to indicate why
1068 static void mark_source_rp_bad(MigrationState *s)
1070 s->rp_state.error = true;
1073 static struct rp_cmd_args {
1074 ssize_t len; /* -1 = variable */
1075 const char *name;
1076 } rp_cmd_args[] = {
1077 [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" },
1078 [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" },
1079 [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" },
1080 [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" },
1084 * Handles messages sent on the return path towards the source VM
1087 static void *source_return_path_thread(void *opaque)
1089 MigrationState *ms = opaque;
1090 QEMUFile *rp = ms->rp_state.from_dst_file;
1091 uint16_t header_len, header_type;
1092 const int max_len = 512;
1093 uint8_t buf[max_len];
1094 uint32_t tmp32, sibling_error;
1095 int res;
1097 trace_source_return_path_thread_entry();
1098 while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
1099 migration_is_setup_or_active(ms->state)) {
1100 trace_source_return_path_thread_loop_top();
1101 header_type = qemu_get_be16(rp);
1102 header_len = qemu_get_be16(rp);
1104 if (header_type >= MIG_RP_MSG_MAX ||
1105 header_type == MIG_RP_MSG_INVALID) {
1106 error_report("RP: Received invalid message 0x%04x length 0x%04x",
1107 header_type, header_len);
1108 mark_source_rp_bad(ms);
1109 goto out;
1112 if ((rp_cmd_args[header_type].len != -1 &&
1113 header_len != rp_cmd_args[header_type].len) ||
1114 header_len > max_len) {
1115 error_report("RP: Received '%s' message (0x%04x) with"
1116 "incorrect length %d expecting %zu",
1117 rp_cmd_args[header_type].name, header_type, header_len,
1118 (size_t)rp_cmd_args[header_type].len);
1119 mark_source_rp_bad(ms);
1120 goto out;
1123 /* We know we've got a valid header by this point */
1124 res = qemu_get_buffer(rp, buf, header_len);
1125 if (res != header_len) {
1126 error_report("RP: Failed reading data for message 0x%04x"
1127 " read %d expected %d",
1128 header_type, res, header_len);
1129 mark_source_rp_bad(ms);
1130 goto out;
1133 /* OK, we have the message and the data */
1134 switch (header_type) {
1135 case MIG_RP_MSG_SHUT:
1136 sibling_error = be32_to_cpup((uint32_t *)buf);
1137 trace_source_return_path_thread_shut(sibling_error);
1138 if (sibling_error) {
1139 error_report("RP: Sibling indicated error %d", sibling_error);
1140 mark_source_rp_bad(ms);
1143 * We'll let the main thread deal with closing the RP
1144 * we could do a shutdown(2) on it, but we're the only user
1145 * anyway, so there's nothing gained.
1147 goto out;
1149 case MIG_RP_MSG_PONG:
1150 tmp32 = be32_to_cpup((uint32_t *)buf);
1151 trace_source_return_path_thread_pong(tmp32);
1152 break;
1154 default:
1155 break;
1158 if (rp && qemu_file_get_error(rp)) {
1159 trace_source_return_path_thread_bad_end();
1160 mark_source_rp_bad(ms);
1163 trace_source_return_path_thread_end();
1164 out:
1165 ms->rp_state.from_dst_file = NULL;
1166 qemu_fclose(rp);
1167 return NULL;
1170 __attribute__ (( unused )) /* Until later in patch series */
1171 static int open_return_path_on_source(MigrationState *ms)
1174 ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->file);
1175 if (!ms->rp_state.from_dst_file) {
1176 return -1;
1179 trace_open_return_path_on_source();
1180 qemu_thread_create(&ms->rp_state.rp_thread, "return path",
1181 source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
1183 trace_open_return_path_on_source_continue();
1185 return 0;
1188 __attribute__ (( unused )) /* Until later in patch series */
1189 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */
1190 static int await_return_path_close_on_source(MigrationState *ms)
1193 * If this is a normal exit then the destination will send a SHUT and the
1194 * rp_thread will exit, however if there's an error we need to cause
1195 * it to exit.
1197 if (qemu_file_get_error(ms->file) && ms->rp_state.from_dst_file) {
1199 * shutdown(2), if we have it, will cause it to unblock if it's stuck
1200 * waiting for the destination.
1202 qemu_file_shutdown(ms->rp_state.from_dst_file);
1203 mark_source_rp_bad(ms);
1205 trace_await_return_path_close_on_source_joining();
1206 qemu_thread_join(&ms->rp_state.rp_thread);
1207 trace_await_return_path_close_on_source_close();
1208 return ms->rp_state.error;
1212 * migration_completion: Used by migration_thread when there's not much left.
1213 * The caller 'breaks' the loop when this returns.
1215 * @s: Current migration state
1216 * @*old_vm_running: Pointer to old_vm_running flag
1217 * @*start_time: Pointer to time to update
1219 static void migration_completion(MigrationState *s, bool *old_vm_running,
1220 int64_t *start_time)
1222 int ret;
1224 qemu_mutex_lock_iothread();
1225 *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1226 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
1227 *old_vm_running = runstate_is_running();
1229 ret = global_state_store();
1230 if (!ret) {
1231 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
1232 if (ret >= 0) {
1233 qemu_file_set_rate_limit(s->file, INT64_MAX);
1234 qemu_savevm_state_complete_precopy(s->file);
1237 qemu_mutex_unlock_iothread();
1239 if (ret < 0) {
1240 goto fail;
1243 if (qemu_file_get_error(s->file)) {
1244 trace_migration_completion_file_err();
1245 goto fail;
1248 migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COMPLETED);
1249 return;
1251 fail:
1252 migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED);
1256 * Master migration thread on the source VM.
1257 * It drives the migration and pumps the data down the outgoing channel.
1259 static void *migration_thread(void *opaque)
1261 MigrationState *s = opaque;
1262 int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1263 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
1264 int64_t initial_bytes = 0;
1265 int64_t max_size = 0;
1266 int64_t start_time = initial_time;
1267 int64_t end_time;
1268 bool old_vm_running = false;
1270 rcu_register_thread();
1272 qemu_savevm_state_header(s->file);
1273 qemu_savevm_state_begin(s->file, &s->params);
1275 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
1276 migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE);
1278 while (s->state == MIGRATION_STATUS_ACTIVE) {
1279 int64_t current_time;
1280 uint64_t pending_size;
1282 if (!qemu_file_rate_limit(s->file)) {
1283 pending_size = qemu_savevm_state_pending(s->file, max_size);
1284 trace_migrate_pending(pending_size, max_size);
1285 if (pending_size && pending_size >= max_size) {
1286 qemu_savevm_state_iterate(s->file);
1287 } else {
1288 trace_migration_thread_low_pending(pending_size);
1289 migration_completion(s, &old_vm_running, &start_time);
1290 break;
1294 if (qemu_file_get_error(s->file)) {
1295 migrate_set_state(s, MIGRATION_STATUS_ACTIVE,
1296 MIGRATION_STATUS_FAILED);
1297 break;
1299 current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1300 if (current_time >= initial_time + BUFFER_DELAY) {
1301 uint64_t transferred_bytes = qemu_ftell(s->file) - initial_bytes;
1302 uint64_t time_spent = current_time - initial_time;
1303 double bandwidth = transferred_bytes / time_spent;
1304 max_size = bandwidth * migrate_max_downtime() / 1000000;
1306 s->mbps = time_spent ? (((double) transferred_bytes * 8.0) /
1307 ((double) time_spent / 1000.0)) / 1000.0 / 1000.0 : -1;
1309 trace_migrate_transferred(transferred_bytes, time_spent,
1310 bandwidth, max_size);
1311 /* if we haven't sent anything, we don't want to recalculate
1312 10000 is a small enough number for our purposes */
1313 if (s->dirty_bytes_rate && transferred_bytes > 10000) {
1314 s->expected_downtime = s->dirty_bytes_rate / bandwidth;
1317 qemu_file_reset_rate_limit(s->file);
1318 initial_time = current_time;
1319 initial_bytes = qemu_ftell(s->file);
1321 if (qemu_file_rate_limit(s->file)) {
1322 /* usleep expects microseconds */
1323 g_usleep((initial_time + BUFFER_DELAY - current_time)*1000);
1327 /* If we enabled cpu throttling for auto-converge, turn it off. */
1328 cpu_throttle_stop();
1329 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1331 qemu_mutex_lock_iothread();
1332 qemu_savevm_state_cleanup();
1333 if (s->state == MIGRATION_STATUS_COMPLETED) {
1334 uint64_t transferred_bytes = qemu_ftell(s->file);
1335 s->total_time = end_time - s->total_time;
1336 s->downtime = end_time - start_time;
1337 if (s->total_time) {
1338 s->mbps = (((double) transferred_bytes * 8.0) /
1339 ((double) s->total_time)) / 1000;
1341 runstate_set(RUN_STATE_POSTMIGRATE);
1342 } else {
1343 if (old_vm_running) {
1344 vm_start();
1347 qemu_bh_schedule(s->cleanup_bh);
1348 qemu_mutex_unlock_iothread();
1350 rcu_unregister_thread();
1351 return NULL;
1354 void migrate_fd_connect(MigrationState *s)
1356 /* This is a best 1st approximation. ns to ms */
1357 s->expected_downtime = max_downtime/1000000;
1358 s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
1360 qemu_file_set_rate_limit(s->file,
1361 s->bandwidth_limit / XFER_LIMIT_RATIO);
1363 /* Notify before starting migration thread */
1364 notifier_list_notify(&migration_state_notifiers, s);
1366 migrate_compress_threads_create();
1367 qemu_thread_create(&s->thread, "migration", migration_thread, s,
1368 QEMU_THREAD_JOINABLE);