migration/colo.c

   1 /*
   2  * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
   3  * (a.k.a. Fault Tolerance or Continuous Replication)
   4  *
   5  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
   6  * Copyright (c) 2016 FUJITSU LIMITED
   7  * Copyright (c) 2016 Intel Corporation
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2 or
  10  * later.  See the COPYING file in the top-level directory.
  11  */
  12
  13 #include "qemu/osdep.h"
  14 #include "sysemu/sysemu.h"
  15 #include "qapi/error.h"
  16 #include "qapi/qapi-commands-migration.h"
  17 #include "qemu-file-channel.h"
  18 #include "migration.h"
  19 #include "qemu-file.h"
  20 #include "savevm.h"
  21 #include "migration/colo.h"
  22 #include "block.h"
  23 #include "io/channel-buffer.h"
  24 #include "trace.h"
  25 #include "qemu/error-report.h"
  26 #include "migration/failover.h"
  27 #ifdef CONFIG_REPLICATION
  28 #include "replication.h"
  29 #endif
  30 #include "net/colo-compare.h"
  31 #include "net/colo.h"
  32 #include "block/block.h"
  33 #include "qapi/qapi-events-migration.h"
  34 #include "qapi/qmp/qerror.h"
  35 #include "sysemu/cpus.h"
  36 #include "net/filter.h"
  37
  38 static bool vmstate_loading;
  39 static Notifier packets_compare_notifier;
  40
  41 #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
  42
  43 bool migration_in_colo_state(void)
  44 {
  45     MigrationState *s = migrate_get_current();
  46
  47     return (s->state == MIGRATION_STATUS_COLO);
  48 }
  49
  50 bool migration_incoming_in_colo_state(void)
  51 {
  52     MigrationIncomingState *mis = migration_incoming_get_current();
  53
  54     return mis && (mis->state == MIGRATION_STATUS_COLO);
  55 }
  56
  57 static bool colo_runstate_is_stopped(void)
  58 {
  59     return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
  60 }
  61
  62 static void secondary_vm_do_failover(void)
  63 {
  64 /* COLO needs enable block-replication */
  65 #ifdef CONFIG_REPLICATION
  66     int old_state;
  67     MigrationIncomingState *mis = migration_incoming_get_current();
  68     Error *local_err = NULL;
  69
  70     /* Can not do failover during the process of VM's loading VMstate, Or
  71      * it will break the secondary VM.
  72      */
  73     if (vmstate_loading) {
  74         old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
  75                         FAILOVER_STATUS_RELAUNCH);
  76         if (old_state != FAILOVER_STATUS_ACTIVE) {
  77             error_report("Unknown error while do failover for secondary VM,"
  78                          "old_state: %s", FailoverStatus_str(old_state));
  79         }
  80         return;
  81     }
  82
  83     migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
  84                       MIGRATION_STATUS_COMPLETED);
  85
  86     replication_stop_all(true, &local_err);
  87     if (local_err) {
  88         error_report_err(local_err);
  89     }
  90
  91     /* Notify all filters of all NIC to do checkpoint */
  92     colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
  93     if (local_err) {
  94         error_report_err(local_err);
  95     }
  96
  97     if (!autostart) {
  98         error_report("\"-S\" qemu option will be ignored in secondary side");
  99         /* recover runstate to normal migration finish state */
 100         autostart = true;
 101     }
 102     /*
 103      * Make sure COLO incoming thread not block in recv or send,
 104      * If mis->from_src_file and mis->to_src_file use the same fd,
 105      * The second shutdown() will return -1, we ignore this value,
 106      * It is harmless.
 107      */
 108     if (mis->from_src_file) {
 109         qemu_file_shutdown(mis->from_src_file);
 110     }
 111     if (mis->to_src_file) {
 112         qemu_file_shutdown(mis->to_src_file);
 113     }
 114
 115     old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
 116                                    FAILOVER_STATUS_COMPLETED);
 117     if (old_state != FAILOVER_STATUS_ACTIVE) {
 118         error_report("Incorrect state (%s) while doing failover for "
 119                      "secondary VM", FailoverStatus_str(old_state));
 120         return;
 121     }
 122     /* Notify COLO incoming thread that failover work is finished */
 123     qemu_sem_post(&mis->colo_incoming_sem);
 124     /* For Secondary VM, jump to incoming co */
 125     if (mis->migration_incoming_co) {
 126         qemu_coroutine_enter(mis->migration_incoming_co);
 127     }
 128 #else
 129     abort();
 130 #endif
 131 }
 132
 133 static void primary_vm_do_failover(void)
 134 {
 135 #ifdef CONFIG_REPLICATION
 136     MigrationState *s = migrate_get_current();
 137     int old_state;
 138     Error *local_err = NULL;
 139
 140     migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
 141                       MIGRATION_STATUS_COMPLETED);
 142     /*
 143      * kick COLO thread which might wait at
 144      * qemu_sem_wait(&s->colo_checkpoint_sem).
 145      */
 146     colo_checkpoint_notify(migrate_get_current());
 147
 148     /*
 149      * Wake up COLO thread which may blocked in recv() or send(),
 150      * The s->rp_state.from_dst_file and s->to_dst_file may use the
 151      * same fd, but we still shutdown the fd for twice, it is harmless.
 152      */
 153     if (s->to_dst_file) {
 154         qemu_file_shutdown(s->to_dst_file);
 155     }
 156     if (s->rp_state.from_dst_file) {
 157         qemu_file_shutdown(s->rp_state.from_dst_file);
 158     }
 159
 160     old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
 161                                    FAILOVER_STATUS_COMPLETED);
 162     if (old_state != FAILOVER_STATUS_ACTIVE) {
 163         error_report("Incorrect state (%s) while doing failover for Primary VM",
 164                      FailoverStatus_str(old_state));
 165         return;
 166     }
 167
 168     replication_stop_all(true, &local_err);
 169     if (local_err) {
 170         error_report_err(local_err);
 171         local_err = NULL;
 172     }
 173
 174     /* Notify COLO thread that failover work is finished */
 175     qemu_sem_post(&s->colo_exit_sem);
 176 #else
 177     abort();
 178 #endif
 179 }
 180
 181 COLOMode get_colo_mode(void)
 182 {
 183     if (migration_in_colo_state()) {
 184         return COLO_MODE_PRIMARY;
 185     } else if (migration_incoming_in_colo_state()) {
 186         return COLO_MODE_SECONDARY;
 187     } else {
 188         return COLO_MODE_NONE;
 189     }
 190 }
 191
 192 void colo_do_failover(MigrationState *s)
 193 {
 194     /* Make sure VM stopped while failover happened. */
 195     if (!colo_runstate_is_stopped()) {
 196         vm_stop_force_state(RUN_STATE_COLO);
 197     }
 198
 199     if (get_colo_mode() == COLO_MODE_PRIMARY) {
 200         primary_vm_do_failover();
 201     } else {
 202         secondary_vm_do_failover();
 203     }
 204 }
 205
 206 #ifdef CONFIG_REPLICATION
 207 void qmp_xen_set_replication(bool enable, bool primary,
 208                              bool has_failover, bool failover,
 209                              Error **errp)
 210 {
 211     ReplicationMode mode = primary ?
 212                            REPLICATION_MODE_PRIMARY :
 213                            REPLICATION_MODE_SECONDARY;
 214
 215     if (has_failover && enable) {
 216         error_setg(errp, "Parameter 'failover' is only for"
 217                    " stopping replication");
 218         return;
 219     }
 220
 221     if (enable) {
 222         replication_start_all(mode, errp);
 223     } else {
 224         if (!has_failover) {
 225             failover = NULL;
 226         }
 227         replication_stop_all(failover, failover ? NULL : errp);
 228     }
 229 }
 230
 231 ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
 232 {
 233     Error *err = NULL;
 234     ReplicationStatus *s = g_new0(ReplicationStatus, 1);
 235
 236     replication_get_error_all(&err);
 237     if (err) {
 238         s->error = true;
 239         s->has_desc = true;
 240         s->desc = g_strdup(error_get_pretty(err));
 241     } else {
 242         s->error = false;
 243     }
 244
 245     error_free(err);
 246     return s;
 247 }
 248
 249 void qmp_xen_colo_do_checkpoint(Error **errp)
 250 {
 251     replication_do_checkpoint_all(errp);
 252 }
 253 #endif
 254
 255 COLOStatus *qmp_query_colo_status(Error **errp)
 256 {
 257     COLOStatus *s = g_new0(COLOStatus, 1);
 258
 259     s->mode = get_colo_mode();
 260
 261     switch (failover_get_state()) {
 262     case FAILOVER_STATUS_NONE:
 263         s->reason = COLO_EXIT_REASON_NONE;
 264         break;
 265     case FAILOVER_STATUS_REQUIRE:
 266         s->reason = COLO_EXIT_REASON_REQUEST;
 267         break;
 268     default:
 269         s->reason = COLO_EXIT_REASON_ERROR;
 270     }
 271
 272     return s;
 273 }
 274
 275 static void colo_send_message(QEMUFile *f, COLOMessage msg,
 276                               Error **errp)
 277 {
 278     int ret;
 279
 280     if (msg >= COLO_MESSAGE__MAX) {
 281         error_setg(errp, "%s: Invalid message", __func__);
 282         return;
 283     }
 284     qemu_put_be32(f, msg);
 285     qemu_fflush(f);
 286
 287     ret = qemu_file_get_error(f);
 288     if (ret < 0) {
 289         error_setg_errno(errp, -ret, "Can't send COLO message");
 290     }
 291     trace_colo_send_message(COLOMessage_str(msg));
 292 }
 293
 294 static void colo_send_message_value(QEMUFile *f, COLOMessage msg,
 295                                     uint64_t value, Error **errp)
 296 {
 297     Error *local_err = NULL;
 298     int ret;
 299
 300     colo_send_message(f, msg, &local_err);
 301     if (local_err) {
 302         error_propagate(errp, local_err);
 303         return;
 304     }
 305     qemu_put_be64(f, value);
 306     qemu_fflush(f);
 307
 308     ret = qemu_file_get_error(f);
 309     if (ret < 0) {
 310         error_setg_errno(errp, -ret, "Failed to send value for message:%s",
 311                          COLOMessage_str(msg));
 312     }
 313 }
 314
 315 static COLOMessage colo_receive_message(QEMUFile *f, Error **errp)
 316 {
 317     COLOMessage msg;
 318     int ret;
 319
 320     msg = qemu_get_be32(f);
 321     ret = qemu_file_get_error(f);
 322     if (ret < 0) {
 323         error_setg_errno(errp, -ret, "Can't receive COLO message");
 324         return msg;
 325     }
 326     if (msg >= COLO_MESSAGE__MAX) {
 327         error_setg(errp, "%s: Invalid message", __func__);
 328         return msg;
 329     }
 330     trace_colo_receive_message(COLOMessage_str(msg));
 331     return msg;
 332 }
 333
 334 static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
 335                                        Error **errp)
 336 {
 337     COLOMessage msg;
 338     Error *local_err = NULL;
 339
 340     msg = colo_receive_message(f, &local_err);
 341     if (local_err) {
 342         error_propagate(errp, local_err);
 343         return;
 344     }
 345     if (msg != expect_msg) {
 346         error_setg(errp, "Unexpected COLO message %d, expected %d",
 347                           msg, expect_msg);
 348     }
 349 }
 350
 351 static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
 352                                            Error **errp)
 353 {
 354     Error *local_err = NULL;
 355     uint64_t value;
 356     int ret;
 357
 358     colo_receive_check_message(f, expect_msg, &local_err);
 359     if (local_err) {
 360         error_propagate(errp, local_err);
 361         return 0;
 362     }
 363
 364     value = qemu_get_be64(f);
 365     ret = qemu_file_get_error(f);
 366     if (ret < 0) {
 367         error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
 368                          COLOMessage_str(expect_msg));
 369     }
 370     return value;
 371 }
 372
 373 static int colo_do_checkpoint_transaction(MigrationState *s,
 374                                           QIOChannelBuffer *bioc,
 375                                           QEMUFile *fb)
 376 {
 377     Error *local_err = NULL;
 378     int ret = -1;
 379
 380     colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST,
 381                       &local_err);
 382     if (local_err) {
 383         goto out;
 384     }
 385
 386     colo_receive_check_message(s->rp_state.from_dst_file,
 387                     COLO_MESSAGE_CHECKPOINT_REPLY, &local_err);
 388     if (local_err) {
 389         goto out;
 390     }
 391     /* Reset channel-buffer directly */
 392     qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
 393     bioc->usage = 0;
 394
 395     qemu_mutex_lock_iothread();
 396     if (failover_get_state() != FAILOVER_STATUS_NONE) {
 397         qemu_mutex_unlock_iothread();
 398         goto out;
 399     }
 400     vm_stop_force_state(RUN_STATE_COLO);
 401     qemu_mutex_unlock_iothread();
 402     trace_colo_vm_state_change("run", "stop");
 403     /*
 404      * Failover request bh could be called after vm_stop_force_state(),
 405      * So we need check failover_request_is_active() again.
 406      */
 407     if (failover_get_state() != FAILOVER_STATUS_NONE) {
 408         goto out;
 409     }
 410
 411     colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
 412     if (local_err) {
 413         goto out;
 414     }
 415
 416     /* Disable block migration */
 417     migrate_set_block_enabled(false, &local_err);
 418     qemu_mutex_lock_iothread();
 419
 420 #ifdef CONFIG_REPLICATION
 421     replication_do_checkpoint_all(&local_err);
 422     if (local_err) {
 423         qemu_mutex_unlock_iothread();
 424         goto out;
 425     }
 426 #else
 427         abort();
 428 #endif
 429
 430     colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
 431     if (local_err) {
 432         qemu_mutex_unlock_iothread();
 433         goto out;
 434     }
 435     /* Note: device state is saved into buffer */
 436     ret = qemu_save_device_state(fb);
 437
 438     qemu_mutex_unlock_iothread();
 439     if (ret < 0) {
 440         goto out;
 441     }
 442     /*
 443      * Only save VM's live state, which not including device state.
 444      * TODO: We may need a timeout mechanism to prevent COLO process
 445      * to be blocked here.
 446      */
 447     qemu_savevm_live_state(s->to_dst_file);
 448
 449     qemu_fflush(fb);
 450
 451     /*
 452      * We need the size of the VMstate data in Secondary side,
 453      * With which we can decide how much data should be read.
 454      */
 455     colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE,
 456                             bioc->usage, &local_err);
 457     if (local_err) {
 458         goto out;
 459     }
 460
 461     qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage);
 462     qemu_fflush(s->to_dst_file);
 463     ret = qemu_file_get_error(s->to_dst_file);
 464     if (ret < 0) {
 465         goto out;
 466     }
 467
 468     colo_receive_check_message(s->rp_state.from_dst_file,
 469                        COLO_MESSAGE_VMSTATE_RECEIVED, &local_err);
 470     if (local_err) {
 471         goto out;
 472     }
 473
 474     colo_receive_check_message(s->rp_state.from_dst_file,
 475                        COLO_MESSAGE_VMSTATE_LOADED, &local_err);
 476     if (local_err) {
 477         goto out;
 478     }
 479
 480     ret = 0;
 481
 482     qemu_mutex_lock_iothread();
 483     vm_start();
 484     qemu_mutex_unlock_iothread();
 485     trace_colo_vm_state_change("stop", "run");
 486
 487 out:
 488     if (local_err) {
 489         error_report_err(local_err);
 490     }
 491     return ret;
 492 }
 493
 494 static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
 495 {
 496     colo_checkpoint_notify(data);
 497 }
 498
 499 static void colo_process_checkpoint(MigrationState *s)
 500 {
 501     QIOChannelBuffer *bioc;
 502     QEMUFile *fb = NULL;
 503     int64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
 504     Error *local_err = NULL;
 505     int ret;
 506
 507     failover_init_state();
 508
 509     s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
 510     if (!s->rp_state.from_dst_file) {
 511         error_report("Open QEMUFile from_dst_file failed");
 512         goto out;
 513     }
 514
 515     packets_compare_notifier.notify = colo_compare_notify_checkpoint;
 516     colo_compare_register_notifier(&packets_compare_notifier);
 517
 518     /*
 519      * Wait for Secondary finish loading VM states and enter COLO
 520      * restore.
 521      */
 522     colo_receive_check_message(s->rp_state.from_dst_file,
 523                        COLO_MESSAGE_CHECKPOINT_READY, &local_err);
 524     if (local_err) {
 525         goto out;
 526     }
 527     bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
 528     fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
 529     object_unref(OBJECT(bioc));
 530
 531     qemu_mutex_lock_iothread();
 532 #ifdef CONFIG_REPLICATION
 533     replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
 534     if (local_err) {
 535         qemu_mutex_unlock_iothread();
 536         goto out;
 537     }
 538 #else
 539         abort();
 540 #endif
 541
 542     vm_start();
 543     qemu_mutex_unlock_iothread();
 544     trace_colo_vm_state_change("stop", "run");
 545
 546     timer_mod(s->colo_delay_timer,
 547             current_time + s->parameters.x_checkpoint_delay);
 548
 549     while (s->state == MIGRATION_STATUS_COLO) {
 550         if (failover_get_state() != FAILOVER_STATUS_NONE) {
 551             error_report("failover request");
 552             goto out;
 553         }
 554
 555         qemu_sem_wait(&s->colo_checkpoint_sem);
 556
 557         if (s->state != MIGRATION_STATUS_COLO) {
 558             goto out;
 559         }
 560         ret = colo_do_checkpoint_transaction(s, bioc, fb);
 561         if (ret < 0) {
 562             goto out;
 563         }
 564     }
 565
 566 out:
 567     /* Throw the unreported error message after exited from loop */
 568     if (local_err) {
 569         error_report_err(local_err);
 570     }
 571
 572     if (fb) {
 573         qemu_fclose(fb);
 574     }
 575
 576     /*
 577      * There are only two reasons we can get here, some error happened
 578      * or the user triggered failover.
 579      */
 580     switch (failover_get_state()) {
 581     case FAILOVER_STATUS_NONE:
 582         qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
 583                                   COLO_EXIT_REASON_ERROR);
 584         break;
 585     case FAILOVER_STATUS_REQUIRE:
 586         qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
 587                                   COLO_EXIT_REASON_REQUEST);
 588         break;
 589     default:
 590         abort();
 591     }
 592
 593     /* Hope this not to be too long to wait here */
 594     qemu_sem_wait(&s->colo_exit_sem);
 595     qemu_sem_destroy(&s->colo_exit_sem);
 596
 597     /*
 598      * It is safe to unregister notifier after failover finished.
 599      * Besides, colo_delay_timer and colo_checkpoint_sem can't be
 600      * released befor unregister notifier, or there will be use-after-free
 601      * error.
 602      */
 603     colo_compare_unregister_notifier(&packets_compare_notifier);
 604     timer_del(s->colo_delay_timer);
 605     timer_free(s->colo_delay_timer);
 606     qemu_sem_destroy(&s->colo_checkpoint_sem);
 607
 608     /*
 609      * Must be called after failover BH is completed,
 610      * Or the failover BH may shutdown the wrong fd that
 611      * re-used by other threads after we release here.
 612      */
 613     if (s->rp_state.from_dst_file) {
 614         qemu_fclose(s->rp_state.from_dst_file);
 615     }
 616 }
 617
 618 void colo_checkpoint_notify(void *opaque)
 619 {
 620     MigrationState *s = opaque;
 621     int64_t next_notify_time;
 622
 623     qemu_sem_post(&s->colo_checkpoint_sem);
 624     s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
 625     next_notify_time = s->colo_checkpoint_time +
 626                     s->parameters.x_checkpoint_delay;
 627     timer_mod(s->colo_delay_timer, next_notify_time);
 628 }
 629
 630 void migrate_start_colo_process(MigrationState *s)
 631 {
 632     qemu_mutex_unlock_iothread();
 633     qemu_sem_init(&s->colo_checkpoint_sem, 0);
 634     s->colo_delay_timer =  timer_new_ms(QEMU_CLOCK_HOST,
 635                                 colo_checkpoint_notify, s);
 636
 637     qemu_sem_init(&s->colo_exit_sem, 0);
 638     migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
 639                       MIGRATION_STATUS_COLO);
 640     colo_process_checkpoint(s);
 641     qemu_mutex_lock_iothread();
 642 }
 643
 644 static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request,
 645                                      Error **errp)
 646 {
 647     COLOMessage msg;
 648     Error *local_err = NULL;
 649
 650     msg = colo_receive_message(f, &local_err);
 651     if (local_err) {
 652         error_propagate(errp, local_err);
 653         return;
 654     }
 655
 656     switch (msg) {
 657     case COLO_MESSAGE_CHECKPOINT_REQUEST:
 658         *checkpoint_request = 1;
 659         break;
 660     default:
 661         *checkpoint_request = 0;
 662         error_setg(errp, "Got unknown COLO message: %d", msg);
 663         break;
 664     }
 665 }
 666
 667 void *colo_process_incoming_thread(void *opaque)
 668 {
 669     MigrationIncomingState *mis = opaque;
 670     QEMUFile *fb = NULL;
 671     QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */
 672     uint64_t total_size;
 673     uint64_t value;
 674     Error *local_err = NULL;
 675     int ret;
 676
 677     rcu_register_thread();
 678     qemu_sem_init(&mis->colo_incoming_sem, 0);
 679
 680     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
 681                       MIGRATION_STATUS_COLO);
 682
 683     failover_init_state();
 684
 685     mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
 686     if (!mis->to_src_file) {
 687         error_report("COLO incoming thread: Open QEMUFile to_src_file failed");
 688         goto out;
 689     }
 690     /*
 691      * Note: the communication between Primary side and Secondary side
 692      * should be sequential, we set the fd to unblocked in migration incoming
 693      * coroutine, and here we are in the COLO incoming thread, so it is ok to
 694      * set the fd back to blocked.
 695      */
 696     qemu_file_set_blocking(mis->from_src_file, true);
 697
 698     bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
 699     fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
 700     object_unref(OBJECT(bioc));
 701
 702     qemu_mutex_lock_iothread();
 703 #ifdef CONFIG_REPLICATION
 704     replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
 705     if (local_err) {
 706         qemu_mutex_unlock_iothread();
 707         goto out;
 708     }
 709 #else
 710         abort();
 711 #endif
 712     vm_start();
 713     trace_colo_vm_state_change("stop", "run");
 714     qemu_mutex_unlock_iothread();
 715
 716     colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
 717                       &local_err);
 718     if (local_err) {
 719         goto out;
 720     }
 721
 722     while (mis->state == MIGRATION_STATUS_COLO) {
 723         int request = 0;
 724
 725         colo_wait_handle_message(mis->from_src_file, &request, &local_err);
 726         if (local_err) {
 727             goto out;
 728         }
 729         assert(request);
 730         if (failover_get_state() != FAILOVER_STATUS_NONE) {
 731             error_report("failover request");
 732             goto out;
 733         }
 734
 735         qemu_mutex_lock_iothread();
 736         vm_stop_force_state(RUN_STATE_COLO);
 737         trace_colo_vm_state_change("run", "stop");
 738         qemu_mutex_unlock_iothread();
 739
 740         /* FIXME: This is unnecessary for periodic checkpoint mode */
 741         colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
 742                      &local_err);
 743         if (local_err) {
 744             goto out;
 745         }
 746
 747         colo_receive_check_message(mis->from_src_file,
 748                            COLO_MESSAGE_VMSTATE_SEND, &local_err);
 749         if (local_err) {
 750             goto out;
 751         }
 752
 753         qemu_mutex_lock_iothread();
 754         cpu_synchronize_all_pre_loadvm();
 755         ret = qemu_loadvm_state_main(mis->from_src_file, mis);
 756         qemu_mutex_unlock_iothread();
 757
 758         if (ret < 0) {
 759             error_report("Load VM's live state (ram) error");
 760             goto out;
 761         }
 762
 763         value = colo_receive_message_value(mis->from_src_file,
 764                                  COLO_MESSAGE_VMSTATE_SIZE, &local_err);
 765         if (local_err) {
 766             goto out;
 767         }
 768
 769         /*
 770          * Read VM device state data into channel buffer,
 771          * It's better to re-use the memory allocated.
 772          * Here we need to handle the channel buffer directly.
 773          */
 774         if (value > bioc->capacity) {
 775             bioc->capacity = value;
 776             bioc->data = g_realloc(bioc->data, bioc->capacity);
 777         }
 778         total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value);
 779         if (total_size != value) {
 780             error_report("Got %" PRIu64 " VMState data, less than expected"
 781                         " %" PRIu64, total_size, value);
 782             goto out;
 783         }
 784         bioc->usage = total_size;
 785         qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
 786
 787         colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
 788                      &local_err);
 789         if (local_err) {
 790             goto out;
 791         }
 792
 793         qemu_mutex_lock_iothread();
 794         vmstate_loading = true;
 795         ret = qemu_load_device_state(fb);
 796         if (ret < 0) {
 797             error_report("COLO: load device state failed");
 798             qemu_mutex_unlock_iothread();
 799             goto out;
 800         }
 801
 802 #ifdef CONFIG_REPLICATION
 803         replication_get_error_all(&local_err);
 804         if (local_err) {
 805             qemu_mutex_unlock_iothread();
 806             goto out;
 807         }
 808
 809         /* discard colo disk buffer */
 810         replication_do_checkpoint_all(&local_err);
 811         if (local_err) {
 812             qemu_mutex_unlock_iothread();
 813             goto out;
 814         }
 815 #else
 816         abort();
 817 #endif
 818         /* Notify all filters of all NIC to do checkpoint */
 819         colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
 820
 821         if (local_err) {
 822             qemu_mutex_unlock_iothread();
 823             goto out;
 824         }
 825
 826         vmstate_loading = false;
 827         vm_start();
 828         trace_colo_vm_state_change("stop", "run");
 829         qemu_mutex_unlock_iothread();
 830
 831         if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
 832             failover_set_state(FAILOVER_STATUS_RELAUNCH,
 833                             FAILOVER_STATUS_NONE);
 834             failover_request_active(NULL);
 835             goto out;
 836         }
 837
 838         colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
 839                      &local_err);
 840         if (local_err) {
 841             goto out;
 842         }
 843     }
 844
 845 out:
 846     vmstate_loading = false;
 847     /* Throw the unreported error message after exited from loop */
 848     if (local_err) {
 849         error_report_err(local_err);
 850     }
 851
 852     switch (failover_get_state()) {
 853     case FAILOVER_STATUS_NONE:
 854         qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
 855                                   COLO_EXIT_REASON_ERROR);
 856         break;
 857     case FAILOVER_STATUS_REQUIRE:
 858         qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
 859                                   COLO_EXIT_REASON_REQUEST);
 860         break;
 861     default:
 862         abort();
 863     }
 864
 865     if (fb) {
 866         qemu_fclose(fb);
 867     }
 868
 869     /* Hope this not to be too long to loop here */
 870     qemu_sem_wait(&mis->colo_incoming_sem);
 871     qemu_sem_destroy(&mis->colo_incoming_sem);
 872     /* Must be called after failover BH is completed */
 873     if (mis->to_src_file) {
 874         qemu_fclose(mis->to_src_file);
 875         mis->to_src_file = NULL;
 876     }
 877
 878     rcu_unregister_thread();
 879     return NULL;
 880 }