migration/colo.c

   1 /*
   2  * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
   3  * (a.k.a. Fault Tolerance or Continuous Replication)
   4  *
   5  * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
   6  * Copyright (c) 2016 FUJITSU LIMITED
   7  * Copyright (c) 2016 Intel Corporation
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2 or
  10  * later.  See the COPYING file in the top-level directory.
  11  */
  12
  13 #include "qemu/osdep.h"
  14 #include "sysemu/sysemu.h"
  15 #include "qapi/error.h"
  16 #include "qapi/qapi-commands-migration.h"
  17 #include "qemu-file-channel.h"
  18 #include "migration.h"
  19 #include "qemu-file.h"
  20 #include "savevm.h"
  21 #include "migration/colo.h"
  22 #include "block.h"
  23 #include "io/channel-buffer.h"
  24 #include "trace.h"
  25 #include "qemu/error-report.h"
  26 #include "qemu/main-loop.h"
  27 #include "qemu/rcu.h"
  28 #include "migration/failover.h"
  29 #ifdef CONFIG_REPLICATION
  30 #include "replication.h"
  31 #endif
  32 #include "net/colo-compare.h"
  33 #include "net/colo.h"
  34 #include "block/block.h"
  35 #include "qapi/qapi-events-migration.h"
  36 #include "qapi/qmp/qerror.h"
  37 #include "sysemu/cpus.h"
  38 #include "sysemu/runstate.h"
  39 #include "net/filter.h"
  40
  41 static bool vmstate_loading;
  42 static Notifier packets_compare_notifier;
  43
  44 /* User need to know colo mode after COLO failover */
  45 static COLOMode last_colo_mode;
  46
  47 #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
  48
  49 bool migration_in_colo_state(void)
  50 {
  51     MigrationState *s = migrate_get_current();
  52
  53     return (s->state == MIGRATION_STATUS_COLO);
  54 }
  55
  56 bool migration_incoming_in_colo_state(void)
  57 {
  58     MigrationIncomingState *mis = migration_incoming_get_current();
  59
  60     return mis && (mis->state == MIGRATION_STATUS_COLO);
  61 }
  62
  63 static bool colo_runstate_is_stopped(void)
  64 {
  65     return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
  66 }
  67
  68 static void secondary_vm_do_failover(void)
  69 {
  70 /* COLO needs enable block-replication */
  71 #ifdef CONFIG_REPLICATION
  72     int old_state;
  73     MigrationIncomingState *mis = migration_incoming_get_current();
  74     Error *local_err = NULL;
  75
  76     /* Can not do failover during the process of VM's loading VMstate, Or
  77      * it will break the secondary VM.
  78      */
  79     if (vmstate_loading) {
  80         old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
  81                         FAILOVER_STATUS_RELAUNCH);
  82         if (old_state != FAILOVER_STATUS_ACTIVE) {
  83             error_report("Unknown error while do failover for secondary VM,"
  84                          "old_state: %s", FailoverStatus_str(old_state));
  85         }
  86         return;
  87     }
  88
  89     migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
  90                       MIGRATION_STATUS_COMPLETED);
  91
  92     replication_stop_all(true, &local_err);
  93     if (local_err) {
  94         error_report_err(local_err);
  95     }
  96
  97     /* Notify all filters of all NIC to do checkpoint */
  98     colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
  99     if (local_err) {
 100         error_report_err(local_err);
 101     }
 102
 103     if (!autostart) {
 104         error_report("\"-S\" qemu option will be ignored in secondary side");
 105         /* recover runstate to normal migration finish state */
 106         autostart = true;
 107     }
 108     /*
 109      * Make sure COLO incoming thread not block in recv or send,
 110      * If mis->from_src_file and mis->to_src_file use the same fd,
 111      * The second shutdown() will return -1, we ignore this value,
 112      * It is harmless.
 113      */
 114     if (mis->from_src_file) {
 115         qemu_file_shutdown(mis->from_src_file);
 116     }
 117     if (mis->to_src_file) {
 118         qemu_file_shutdown(mis->to_src_file);
 119     }
 120
 121     old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
 122                                    FAILOVER_STATUS_COMPLETED);
 123     if (old_state != FAILOVER_STATUS_ACTIVE) {
 124         error_report("Incorrect state (%s) while doing failover for "
 125                      "secondary VM", FailoverStatus_str(old_state));
 126         return;
 127     }
 128     /* Notify COLO incoming thread that failover work is finished */
 129     qemu_sem_post(&mis->colo_incoming_sem);
 130
 131     /* For Secondary VM, jump to incoming co */
 132     if (mis->migration_incoming_co) {
 133         qemu_coroutine_enter(mis->migration_incoming_co);
 134     }
 135 #else
 136     abort();
 137 #endif
 138 }
 139
 140 static void primary_vm_do_failover(void)
 141 {
 142 #ifdef CONFIG_REPLICATION
 143     MigrationState *s = migrate_get_current();
 144     int old_state;
 145     Error *local_err = NULL;
 146
 147     migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
 148                       MIGRATION_STATUS_COMPLETED);
 149     /*
 150      * kick COLO thread which might wait at
 151      * qemu_sem_wait(&s->colo_checkpoint_sem).
 152      */
 153     colo_checkpoint_notify(migrate_get_current());
 154
 155     /*
 156      * Wake up COLO thread which may blocked in recv() or send(),
 157      * The s->rp_state.from_dst_file and s->to_dst_file may use the
 158      * same fd, but we still shutdown the fd for twice, it is harmless.
 159      */
 160     if (s->to_dst_file) {
 161         qemu_file_shutdown(s->to_dst_file);
 162     }
 163     if (s->rp_state.from_dst_file) {
 164         qemu_file_shutdown(s->rp_state.from_dst_file);
 165     }
 166
 167     old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
 168                                    FAILOVER_STATUS_COMPLETED);
 169     if (old_state != FAILOVER_STATUS_ACTIVE) {
 170         error_report("Incorrect state (%s) while doing failover for Primary VM",
 171                      FailoverStatus_str(old_state));
 172         return;
 173     }
 174
 175     replication_stop_all(true, &local_err);
 176     if (local_err) {
 177         error_report_err(local_err);
 178         local_err = NULL;
 179     }
 180
 181     /* Notify COLO thread that failover work is finished */
 182     qemu_sem_post(&s->colo_exit_sem);
 183 #else
 184     abort();
 185 #endif
 186 }
 187
 188 COLOMode get_colo_mode(void)
 189 {
 190     if (migration_in_colo_state()) {
 191         return COLO_MODE_PRIMARY;
 192     } else if (migration_incoming_in_colo_state()) {
 193         return COLO_MODE_SECONDARY;
 194     } else {
 195         return COLO_MODE_NONE;
 196     }
 197 }
 198
 199 void colo_do_failover(void)
 200 {
 201     /* Make sure VM stopped while failover happened. */
 202     if (!colo_runstate_is_stopped()) {
 203         vm_stop_force_state(RUN_STATE_COLO);
 204     }
 205
 206     switch (get_colo_mode()) {
 207     case COLO_MODE_PRIMARY:
 208         primary_vm_do_failover();
 209         break;
 210     case COLO_MODE_SECONDARY:
 211         secondary_vm_do_failover();
 212         break;
 213     default:
 214         error_report("colo_do_failover failed because the colo mode"
 215                      " could not be obtained");
 216     }
 217 }
 218
 219 #ifdef CONFIG_REPLICATION
 220 void qmp_xen_set_replication(bool enable, bool primary,
 221                              bool has_failover, bool failover,
 222                              Error **errp)
 223 {
 224     ReplicationMode mode = primary ?
 225                            REPLICATION_MODE_PRIMARY :
 226                            REPLICATION_MODE_SECONDARY;
 227
 228     if (has_failover && enable) {
 229         error_setg(errp, "Parameter 'failover' is only for"
 230                    " stopping replication");
 231         return;
 232     }
 233
 234     if (enable) {
 235         replication_start_all(mode, errp);
 236     } else {
 237         if (!has_failover) {
 238             failover = NULL;
 239         }
 240         replication_stop_all(failover, failover ? NULL : errp);
 241     }
 242 }
 243
 244 ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
 245 {
 246     Error *err = NULL;
 247     ReplicationStatus *s = g_new0(ReplicationStatus, 1);
 248
 249     replication_get_error_all(&err);
 250     if (err) {
 251         s->error = true;
 252         s->has_desc = true;
 253         s->desc = g_strdup(error_get_pretty(err));
 254     } else {
 255         s->error = false;
 256     }
 257
 258     error_free(err);
 259     return s;
 260 }
 261
 262 void qmp_xen_colo_do_checkpoint(Error **errp)
 263 {
 264     replication_do_checkpoint_all(errp);
 265     /* Notify all filters of all NIC to do checkpoint */
 266     colo_notify_filters_event(COLO_EVENT_CHECKPOINT, errp);
 267 }
 268 #endif
 269
 270 COLOStatus *qmp_query_colo_status(Error **errp)
 271 {
 272     COLOStatus *s = g_new0(COLOStatus, 1);
 273
 274     s->mode = get_colo_mode();
 275     s->last_mode = last_colo_mode;
 276
 277     switch (failover_get_state()) {
 278     case FAILOVER_STATUS_NONE:
 279         s->reason = COLO_EXIT_REASON_NONE;
 280         break;
 281     case FAILOVER_STATUS_COMPLETED:
 282         s->reason = COLO_EXIT_REASON_REQUEST;
 283         break;
 284     default:
 285         if (migration_in_colo_state()) {
 286             s->reason = COLO_EXIT_REASON_PROCESSING;
 287         } else {
 288             s->reason = COLO_EXIT_REASON_ERROR;
 289         }
 290     }
 291
 292     return s;
 293 }
 294
 295 static void colo_send_message(QEMUFile *f, COLOMessage msg,
 296                               Error **errp)
 297 {
 298     int ret;
 299
 300     if (msg >= COLO_MESSAGE__MAX) {
 301         error_setg(errp, "%s: Invalid message", __func__);
 302         return;
 303     }
 304     qemu_put_be32(f, msg);
 305     qemu_fflush(f);
 306
 307     ret = qemu_file_get_error(f);
 308     if (ret < 0) {
 309         error_setg_errno(errp, -ret, "Can't send COLO message");
 310     }
 311     trace_colo_send_message(COLOMessage_str(msg));
 312 }
 313
 314 static void colo_send_message_value(QEMUFile *f, COLOMessage msg,
 315                                     uint64_t value, Error **errp)
 316 {
 317     Error *local_err = NULL;
 318     int ret;
 319
 320     colo_send_message(f, msg, &local_err);
 321     if (local_err) {
 322         error_propagate(errp, local_err);
 323         return;
 324     }
 325     qemu_put_be64(f, value);
 326     qemu_fflush(f);
 327
 328     ret = qemu_file_get_error(f);
 329     if (ret < 0) {
 330         error_setg_errno(errp, -ret, "Failed to send value for message:%s",
 331                          COLOMessage_str(msg));
 332     }
 333 }
 334
 335 static COLOMessage colo_receive_message(QEMUFile *f, Error **errp)
 336 {
 337     COLOMessage msg;
 338     int ret;
 339
 340     msg = qemu_get_be32(f);
 341     ret = qemu_file_get_error(f);
 342     if (ret < 0) {
 343         error_setg_errno(errp, -ret, "Can't receive COLO message");
 344         return msg;
 345     }
 346     if (msg >= COLO_MESSAGE__MAX) {
 347         error_setg(errp, "%s: Invalid message", __func__);
 348         return msg;
 349     }
 350     trace_colo_receive_message(COLOMessage_str(msg));
 351     return msg;
 352 }
 353
 354 static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
 355                                        Error **errp)
 356 {
 357     COLOMessage msg;
 358     Error *local_err = NULL;
 359
 360     msg = colo_receive_message(f, &local_err);
 361     if (local_err) {
 362         error_propagate(errp, local_err);
 363         return;
 364     }
 365     if (msg != expect_msg) {
 366         error_setg(errp, "Unexpected COLO message %d, expected %d",
 367                           msg, expect_msg);
 368     }
 369 }
 370
 371 static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
 372                                            Error **errp)
 373 {
 374     Error *local_err = NULL;
 375     uint64_t value;
 376     int ret;
 377
 378     colo_receive_check_message(f, expect_msg, &local_err);
 379     if (local_err) {
 380         error_propagate(errp, local_err);
 381         return 0;
 382     }
 383
 384     value = qemu_get_be64(f);
 385     ret = qemu_file_get_error(f);
 386     if (ret < 0) {
 387         error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
 388                          COLOMessage_str(expect_msg));
 389     }
 390     return value;
 391 }
 392
 393 static int colo_do_checkpoint_transaction(MigrationState *s,
 394                                           QIOChannelBuffer *bioc,
 395                                           QEMUFile *fb)
 396 {
 397     Error *local_err = NULL;
 398     int ret = -1;
 399
 400     colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST,
 401                       &local_err);
 402     if (local_err) {
 403         goto out;
 404     }
 405
 406     colo_receive_check_message(s->rp_state.from_dst_file,
 407                     COLO_MESSAGE_CHECKPOINT_REPLY, &local_err);
 408     if (local_err) {
 409         goto out;
 410     }
 411     /* Reset channel-buffer directly */
 412     qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
 413     bioc->usage = 0;
 414
 415     qemu_mutex_lock_iothread();
 416     if (failover_get_state() != FAILOVER_STATUS_NONE) {
 417         qemu_mutex_unlock_iothread();
 418         goto out;
 419     }
 420     vm_stop_force_state(RUN_STATE_COLO);
 421     qemu_mutex_unlock_iothread();
 422     trace_colo_vm_state_change("run", "stop");
 423     /*
 424      * Failover request bh could be called after vm_stop_force_state(),
 425      * So we need check failover_request_is_active() again.
 426      */
 427     if (failover_get_state() != FAILOVER_STATUS_NONE) {
 428         goto out;
 429     }
 430
 431     colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
 432     if (local_err) {
 433         goto out;
 434     }
 435
 436     /* Disable block migration */
 437     migrate_set_block_enabled(false, &local_err);
 438     qemu_mutex_lock_iothread();
 439
 440 #ifdef CONFIG_REPLICATION
 441     replication_do_checkpoint_all(&local_err);
 442     if (local_err) {
 443         qemu_mutex_unlock_iothread();
 444         goto out;
 445     }
 446 #else
 447         abort();
 448 #endif
 449
 450     colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
 451     if (local_err) {
 452         qemu_mutex_unlock_iothread();
 453         goto out;
 454     }
 455     /* Note: device state is saved into buffer */
 456     ret = qemu_save_device_state(fb);
 457
 458     qemu_mutex_unlock_iothread();
 459     if (ret < 0) {
 460         goto out;
 461     }
 462     /*
 463      * Only save VM's live state, which not including device state.
 464      * TODO: We may need a timeout mechanism to prevent COLO process
 465      * to be blocked here.
 466      */
 467     qemu_savevm_live_state(s->to_dst_file);
 468
 469     qemu_fflush(fb);
 470
 471     /*
 472      * We need the size of the VMstate data in Secondary side,
 473      * With which we can decide how much data should be read.
 474      */
 475     colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE,
 476                             bioc->usage, &local_err);
 477     if (local_err) {
 478         goto out;
 479     }
 480
 481     qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage);
 482     qemu_fflush(s->to_dst_file);
 483     ret = qemu_file_get_error(s->to_dst_file);
 484     if (ret < 0) {
 485         goto out;
 486     }
 487
 488     colo_receive_check_message(s->rp_state.from_dst_file,
 489                        COLO_MESSAGE_VMSTATE_RECEIVED, &local_err);
 490     if (local_err) {
 491         goto out;
 492     }
 493
 494     colo_receive_check_message(s->rp_state.from_dst_file,
 495                        COLO_MESSAGE_VMSTATE_LOADED, &local_err);
 496     if (local_err) {
 497         goto out;
 498     }
 499
 500     ret = 0;
 501
 502     qemu_mutex_lock_iothread();
 503     vm_start();
 504     qemu_mutex_unlock_iothread();
 505     trace_colo_vm_state_change("stop", "run");
 506
 507 out:
 508     if (local_err) {
 509         error_report_err(local_err);
 510     }
 511     return ret;
 512 }
 513
 514 static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
 515 {
 516     colo_checkpoint_notify(data);
 517 }
 518
 519 static void colo_process_checkpoint(MigrationState *s)
 520 {
 521     QIOChannelBuffer *bioc;
 522     QEMUFile *fb = NULL;
 523     int64_t current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
 524     Error *local_err = NULL;
 525     int ret;
 526
 527     last_colo_mode = get_colo_mode();
 528     if (last_colo_mode != COLO_MODE_PRIMARY) {
 529         error_report("COLO mode must be COLO_MODE_PRIMARY");
 530         return;
 531     }
 532
 533     failover_init_state();
 534
 535     s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
 536     if (!s->rp_state.from_dst_file) {
 537         error_report("Open QEMUFile from_dst_file failed");
 538         goto out;
 539     }
 540
 541     packets_compare_notifier.notify = colo_compare_notify_checkpoint;
 542     colo_compare_register_notifier(&packets_compare_notifier);
 543
 544     /*
 545      * Wait for Secondary finish loading VM states and enter COLO
 546      * restore.
 547      */
 548     colo_receive_check_message(s->rp_state.from_dst_file,
 549                        COLO_MESSAGE_CHECKPOINT_READY, &local_err);
 550     if (local_err) {
 551         goto out;
 552     }
 553     bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
 554     fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
 555     object_unref(OBJECT(bioc));
 556
 557     qemu_mutex_lock_iothread();
 558 #ifdef CONFIG_REPLICATION
 559     replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
 560     if (local_err) {
 561         qemu_mutex_unlock_iothread();
 562         goto out;
 563     }
 564 #else
 565         abort();
 566 #endif
 567
 568     vm_start();
 569     qemu_mutex_unlock_iothread();
 570     trace_colo_vm_state_change("stop", "run");
 571
 572     timer_mod(s->colo_delay_timer,
 573             current_time + s->parameters.x_checkpoint_delay);
 574
 575     while (s->state == MIGRATION_STATUS_COLO) {
 576         if (failover_get_state() != FAILOVER_STATUS_NONE) {
 577             error_report("failover request");
 578             goto out;
 579         }
 580
 581         qemu_sem_wait(&s->colo_checkpoint_sem);
 582
 583         if (s->state != MIGRATION_STATUS_COLO) {
 584             goto out;
 585         }
 586         ret = colo_do_checkpoint_transaction(s, bioc, fb);
 587         if (ret < 0) {
 588             goto out;
 589         }
 590     }
 591
 592 out:
 593     /* Throw the unreported error message after exited from loop */
 594     if (local_err) {
 595         error_report_err(local_err);
 596     }
 597
 598     if (fb) {
 599         qemu_fclose(fb);
 600     }
 601
 602     /*
 603      * There are only two reasons we can get here, some error happened
 604      * or the user triggered failover.
 605      */
 606     switch (failover_get_state()) {
 607     case FAILOVER_STATUS_COMPLETED:
 608         qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
 609                                   COLO_EXIT_REASON_REQUEST);
 610         break;
 611     default:
 612         qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
 613                                   COLO_EXIT_REASON_ERROR);
 614     }
 615
 616     /* Hope this not to be too long to wait here */
 617     qemu_sem_wait(&s->colo_exit_sem);
 618     qemu_sem_destroy(&s->colo_exit_sem);
 619
 620     /*
 621      * It is safe to unregister notifier after failover finished.
 622      * Besides, colo_delay_timer and colo_checkpoint_sem can't be
 623      * released befor unregister notifier, or there will be use-after-free
 624      * error.
 625      */
 626     colo_compare_unregister_notifier(&packets_compare_notifier);
 627     timer_del(s->colo_delay_timer);
 628     timer_free(s->colo_delay_timer);
 629     qemu_sem_destroy(&s->colo_checkpoint_sem);
 630
 631     /*
 632      * Must be called after failover BH is completed,
 633      * Or the failover BH may shutdown the wrong fd that
 634      * re-used by other threads after we release here.
 635      */
 636     if (s->rp_state.from_dst_file) {
 637         qemu_fclose(s->rp_state.from_dst_file);
 638     }
 639 }
 640
 641 void colo_checkpoint_notify(void *opaque)
 642 {
 643     MigrationState *s = opaque;
 644     int64_t next_notify_time;
 645
 646     qemu_sem_post(&s->colo_checkpoint_sem);
 647     s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
 648     next_notify_time = s->colo_checkpoint_time +
 649                     s->parameters.x_checkpoint_delay;
 650     timer_mod(s->colo_delay_timer, next_notify_time);
 651 }
 652
 653 void migrate_start_colo_process(MigrationState *s)
 654 {
 655     qemu_mutex_unlock_iothread();
 656     qemu_sem_init(&s->colo_checkpoint_sem, 0);
 657     s->colo_delay_timer =  timer_new_ms(QEMU_CLOCK_HOST,
 658                                 colo_checkpoint_notify, s);
 659
 660     qemu_sem_init(&s->colo_exit_sem, 0);
 661     migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
 662                       MIGRATION_STATUS_COLO);
 663     colo_process_checkpoint(s);
 664     qemu_mutex_lock_iothread();
 665 }
 666
 667 static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
 668                       QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
 669 {
 670     uint64_t total_size;
 671     uint64_t value;
 672     Error *local_err = NULL;
 673     int ret;
 674
 675     qemu_mutex_lock_iothread();
 676     vm_stop_force_state(RUN_STATE_COLO);
 677     trace_colo_vm_state_change("run", "stop");
 678     qemu_mutex_unlock_iothread();
 679
 680     /* FIXME: This is unnecessary for periodic checkpoint mode */
 681     colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
 682                  &local_err);
 683     if (local_err) {
 684         error_propagate(errp, local_err);
 685         return;
 686     }
 687
 688     colo_receive_check_message(mis->from_src_file,
 689                        COLO_MESSAGE_VMSTATE_SEND, &local_err);
 690     if (local_err) {
 691         error_propagate(errp, local_err);
 692         return;
 693     }
 694
 695     qemu_mutex_lock_iothread();
 696     cpu_synchronize_all_pre_loadvm();
 697     ret = qemu_loadvm_state_main(mis->from_src_file, mis);
 698     qemu_mutex_unlock_iothread();
 699
 700     if (ret < 0) {
 701         error_setg(errp, "Load VM's live state (ram) error");
 702         return;
 703     }
 704
 705     value = colo_receive_message_value(mis->from_src_file,
 706                              COLO_MESSAGE_VMSTATE_SIZE, &local_err);
 707     if (local_err) {
 708         error_propagate(errp, local_err);
 709         return;
 710     }
 711
 712     /*
 713      * Read VM device state data into channel buffer,
 714      * It's better to re-use the memory allocated.
 715      * Here we need to handle the channel buffer directly.
 716      */
 717     if (value > bioc->capacity) {
 718         bioc->capacity = value;
 719         bioc->data = g_realloc(bioc->data, bioc->capacity);
 720     }
 721     total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value);
 722     if (total_size != value) {
 723         error_setg(errp, "Got %" PRIu64 " VMState data, less than expected"
 724                     " %" PRIu64, total_size, value);
 725         return;
 726     }
 727     bioc->usage = total_size;
 728     qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
 729
 730     colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
 731                  &local_err);
 732     if (local_err) {
 733         error_propagate(errp, local_err);
 734         return;
 735     }
 736
 737     qemu_mutex_lock_iothread();
 738     vmstate_loading = true;
 739     ret = qemu_load_device_state(fb);
 740     if (ret < 0) {
 741         error_setg(errp, "COLO: load device state failed");
 742         qemu_mutex_unlock_iothread();
 743         return;
 744     }
 745
 746 #ifdef CONFIG_REPLICATION
 747     replication_get_error_all(&local_err);
 748     if (local_err) {
 749         error_propagate(errp, local_err);
 750         qemu_mutex_unlock_iothread();
 751         return;
 752     }
 753
 754     /* discard colo disk buffer */
 755     replication_do_checkpoint_all(&local_err);
 756     if (local_err) {
 757         error_propagate(errp, local_err);
 758         qemu_mutex_unlock_iothread();
 759         return;
 760     }
 761 #else
 762     abort();
 763 #endif
 764     /* Notify all filters of all NIC to do checkpoint */
 765     colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
 766
 767     if (local_err) {
 768         error_propagate(errp, local_err);
 769         qemu_mutex_unlock_iothread();
 770         return;
 771     }
 772
 773     vmstate_loading = false;
 774     vm_start();
 775     trace_colo_vm_state_change("stop", "run");
 776     qemu_mutex_unlock_iothread();
 777
 778     if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
 779         failover_set_state(FAILOVER_STATUS_RELAUNCH,
 780                         FAILOVER_STATUS_NONE);
 781         failover_request_active(NULL);
 782         return;
 783     }
 784
 785     colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
 786                  &local_err);
 787     if (local_err) {
 788         error_propagate(errp, local_err);
 789     }
 790 }
 791
 792 static void colo_wait_handle_message(MigrationIncomingState *mis,
 793                 QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
 794 {
 795     COLOMessage msg;
 796     Error *local_err = NULL;
 797
 798     msg = colo_receive_message(mis->from_src_file, &local_err);
 799     if (local_err) {
 800         error_propagate(errp, local_err);
 801         return;
 802     }
 803
 804     switch (msg) {
 805     case COLO_MESSAGE_CHECKPOINT_REQUEST:
 806         colo_incoming_process_checkpoint(mis, fb, bioc, errp);
 807         break;
 808     default:
 809         error_setg(errp, "Got unknown COLO message: %d", msg);
 810         break;
 811     }
 812 }
 813
 814 void *colo_process_incoming_thread(void *opaque)
 815 {
 816     MigrationIncomingState *mis = opaque;
 817     QEMUFile *fb = NULL;
 818     QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */
 819     Error *local_err = NULL;
 820
 821     rcu_register_thread();
 822     qemu_sem_init(&mis->colo_incoming_sem, 0);
 823
 824     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
 825                       MIGRATION_STATUS_COLO);
 826
 827     last_colo_mode = get_colo_mode();
 828     if (last_colo_mode != COLO_MODE_SECONDARY) {
 829         error_report("COLO mode must be COLO_MODE_SECONDARY");
 830         return NULL;
 831     }
 832
 833     failover_init_state();
 834
 835     mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
 836     if (!mis->to_src_file) {
 837         error_report("COLO incoming thread: Open QEMUFile to_src_file failed");
 838         goto out;
 839     }
 840     /*
 841      * Note: the communication between Primary side and Secondary side
 842      * should be sequential, we set the fd to unblocked in migration incoming
 843      * coroutine, and here we are in the COLO incoming thread, so it is ok to
 844      * set the fd back to blocked.
 845      */
 846     qemu_file_set_blocking(mis->from_src_file, true);
 847
 848     bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
 849     fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
 850     object_unref(OBJECT(bioc));
 851
 852     qemu_mutex_lock_iothread();
 853 #ifdef CONFIG_REPLICATION
 854     replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
 855     if (local_err) {
 856         qemu_mutex_unlock_iothread();
 857         goto out;
 858     }
 859 #else
 860         abort();
 861 #endif
 862     vm_start();
 863     trace_colo_vm_state_change("stop", "run");
 864     qemu_mutex_unlock_iothread();
 865
 866     colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
 867                       &local_err);
 868     if (local_err) {
 869         goto out;
 870     }
 871
 872     while (mis->state == MIGRATION_STATUS_COLO) {
 873         colo_wait_handle_message(mis, fb, bioc, &local_err);
 874         if (local_err) {
 875             error_report_err(local_err);
 876             break;
 877         }
 878         if (failover_get_state() != FAILOVER_STATUS_NONE) {
 879             error_report("failover request");
 880             break;
 881         }
 882     }
 883
 884 out:
 885     vmstate_loading = false;
 886
 887     /*
 888      * There are only two reasons we can get here, some error happened
 889      * or the user triggered failover.
 890      */
 891     switch (failover_get_state()) {
 892     case FAILOVER_STATUS_COMPLETED:
 893         qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
 894                                   COLO_EXIT_REASON_REQUEST);
 895         break;
 896     default:
 897         qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
 898                                   COLO_EXIT_REASON_ERROR);
 899     }
 900
 901     if (fb) {
 902         qemu_fclose(fb);
 903     }
 904
 905     /* Hope this not to be too long to loop here */
 906     qemu_sem_wait(&mis->colo_incoming_sem);
 907     qemu_sem_destroy(&mis->colo_incoming_sem);
 908     /* Must be called after failover BH is completed */
 909     if (mis->to_src_file) {
 910         qemu_fclose(mis->to_src_file);
 911         mis->to_src_file = NULL;
 912     }
 913
 914     rcu_unregister_thread();
 915     return NULL;
 916 }