migration/migration.h

   1 /*
   2  * QEMU live migration
   3  *
   4  * Copyright IBM, Corp. 2008
   5  *
   6  * Authors:
   7  *  Anthony Liguori   <aliguori@us.ibm.com>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2.  See
  10  * the COPYING file in the top-level directory.
  11  *
  12  */
  13
  14 #ifndef QEMU_MIGRATION_H
  15 #define QEMU_MIGRATION_H
  16
  17 #include "exec/cpu-common.h"
  18 #include "hw/qdev-core.h"
  19 #include "qapi/qapi-types-migration.h"
  20 #include "qemu/thread.h"
  21 #include "qemu/coroutine_int.h"
  22 #include "io/channel.h"
  23 #include "io/channel-buffer.h"
  24 #include "net/announce.h"
  25 #include "qom/object.h"
  26
  27 struct PostcopyBlocktimeContext;
  28
  29 #define  MIGRATION_RESUME_ACK_VALUE  (1)
  30
  31 /*
  32  * 1<<6=64 pages -> 256K chunk when page size is 4K.  This gives us
  33  * the benefit that all the chunks are 64 pages aligned then the
  34  * bitmaps are always aligned to LONG.
  35  */
  36 #define CLEAR_BITMAP_SHIFT_MIN             6
  37 /*
  38  * 1<<18=256K pages -> 1G chunk when page size is 4K.  This is the
  39  * default value to use if no one specified.
  40  */
  41 #define CLEAR_BITMAP_SHIFT_DEFAULT        18
  42 /*
  43  * 1<<31=2G pages -> 8T chunk when page size is 4K.  This should be
  44  * big enough and make sure we won't overflow easily.
  45  */
  46 #define CLEAR_BITMAP_SHIFT_MAX            31
  47
  48 /* This is an abstraction of a "temp huge page" for postcopy's purpose */
  49 typedef struct {
  50     /*
  51      * This points to a temporary huge page as a buffer for UFFDIO_COPY.  It's
  52      * mmap()ed and needs to be freed when cleanup.
  53      */
  54     void *tmp_huge_page;
  55     /*
  56      * This points to the host page we're going to install for this temp page.
  57      * It tells us after we've received the whole page, where we should put it.
  58      */
  59     void *host_addr;
  60     /* Number of small pages copied (in size of TARGET_PAGE_SIZE) */
  61     unsigned int target_pages;
  62     /* Whether this page contains all zeros */
  63     bool all_zero;
  64 } PostcopyTmpPage;
  65
  66 /* State for the incoming migration */
  67 struct MigrationIncomingState {
  68     QEMUFile *from_src_file;
  69     /* Previously received RAM's RAMBlock pointer */
  70     RAMBlock *last_recv_block;
  71     /* A hook to allow cleanup at the end of incoming migration */
  72     void *transport_data;
  73     void (*transport_cleanup)(void *data);
  74     /*
  75      * Used to sync thread creations.  Note that we can't create threads in
  76      * parallel with this sem.
  77      */
  78     QemuSemaphore  thread_sync_sem;
  79     /*
  80      * Free at the start of the main state load, set as the main thread finishes
  81      * loading state.
  82      */
  83     QemuEvent main_thread_load_event;
  84
  85     /* For network announces */
  86     AnnounceTimer  announce_timer;
  87
  88     size_t         largest_page_size;
  89     bool           have_fault_thread;
  90     QemuThread     fault_thread;
  91     /* Set this when we want the fault thread to quit */
  92     bool           fault_thread_quit;
  93
  94     bool           have_listen_thread;
  95     QemuThread     listen_thread;
  96
  97     /* For the kernel to send us notifications */
  98     int       userfault_fd;
  99     /* To notify the fault_thread to wake, e.g., when need to quit */
 100     int       userfault_event_fd;
 101     QEMUFile *to_src_file;
 102     QemuMutex rp_mutex;    /* We send replies from multiple threads */
 103     /* RAMBlock of last request sent to source */
 104     RAMBlock *last_rb;
 105     /*
 106      * Number of postcopy channels including the default precopy channel, so
 107      * vanilla postcopy will only contain one channel which contain both
 108      * precopy and postcopy streams.
 109      *
 110      * This is calculated when the src requests to enable postcopy but before
 111      * it starts.  Its value can depend on e.g. whether postcopy preemption is
 112      * enabled.
 113      */
 114     unsigned int postcopy_channels;
 115     /*
 116      * An array of temp host huge pages to be used, one for each postcopy
 117      * channel.
 118      */
 119     PostcopyTmpPage *postcopy_tmp_pages;
 120     /* This is shared for all postcopy channels */
 121     void     *postcopy_tmp_zero_page;
 122     /* PostCopyFD's for external userfaultfds & handlers of shared memory */
 123     GArray   *postcopy_remote_fds;
 124
 125     QEMUBH *bh;
 126
 127     int state;
 128
 129     bool have_colo_incoming_thread;
 130     QemuThread colo_incoming_thread;
 131     /* The coroutine we should enter (back) after failover */
 132     Coroutine *migration_incoming_co;
 133     QemuSemaphore colo_incoming_sem;
 134
 135     /*
 136      * PostcopyBlocktimeContext to keep information for postcopy
 137      * live migration, to calculate vCPU block time
 138      * */
 139     struct PostcopyBlocktimeContext *blocktime_ctx;
 140
 141     /* notify PAUSED postcopy incoming migrations to try to continue */
 142     QemuSemaphore postcopy_pause_sem_dst;
 143     QemuSemaphore postcopy_pause_sem_fault;
 144
 145     /* List of listening socket addresses  */
 146     SocketAddressList *socket_address_list;
 147
 148     /* A tree of pages that we requested to the source VM */
 149     GTree *page_requested;
 150     /* For debugging purpose only, but would be nice to keep */
 151     int page_requested_count;
 152     /*
 153      * The mutex helps to maintain the requested pages that we sent to the
 154      * source, IOW, to guarantee coherent between the page_requests tree and
 155      * the per-ramblock receivedmap.  Note! This does not guarantee consistency
 156      * of the real page copy procedures (using UFFDIO_[ZERO]COPY).  E.g., even
 157      * if one bit in receivedmap is cleared, UFFDIO_COPY could have happened
 158      * for that page already.  This is intended so that the mutex won't
 159      * serialize and blocked by slow operations like UFFDIO_* ioctls.  However
 160      * this should be enough to make sure the page_requested tree always
 161      * contains valid information.
 162      */
 163     QemuMutex page_request_mutex;
 164 };
 165
 166 MigrationIncomingState *migration_incoming_get_current(void);
 167 void migration_incoming_state_destroy(void);
 168 void migration_incoming_transport_cleanup(MigrationIncomingState *mis);
 169 /*
 170  * Functions to work with blocktime context
 171  */
 172 void fill_destination_postcopy_migration_info(MigrationInfo *info);
 173
 174 #define TYPE_MIGRATION "migration"
 175
 176 typedef struct MigrationClass MigrationClass;
 177 DECLARE_OBJ_CHECKERS(MigrationState, MigrationClass,
 178                      MIGRATION_OBJ, TYPE_MIGRATION)
 179
 180 struct MigrationClass {
 181     /*< private >*/
 182     DeviceClass parent_class;
 183 };
 184
 185 struct MigrationState {
 186     /*< private >*/
 187     DeviceState parent_obj;
 188
 189     /*< public >*/
 190     QemuThread thread;
 191     QEMUBH *vm_start_bh;
 192     QEMUBH *cleanup_bh;
 193     /* Protected by qemu_file_lock */
 194     QEMUFile *to_dst_file;
 195     QIOChannelBuffer *bioc;
 196     /*
 197      * Protects to_dst_file/from_dst_file pointers.  We need to make sure we
 198      * won't yield or hang during the critical section, since this lock will be
 199      * used in OOB command handler.
 200      */
 201     QemuMutex qemu_file_lock;
 202
 203     /*
 204      * Used to allow urgent requests to override rate limiting.
 205      */
 206     QemuSemaphore rate_limit_sem;
 207
 208     /* pages already send at the beginning of current iteration */
 209     uint64_t iteration_initial_pages;
 210
 211     /* pages transferred per second */
 212     double pages_per_second;
 213
 214     /* bytes already send at the beginning of current iteration */
 215     uint64_t iteration_initial_bytes;
 216     /* time at the start of current iteration */
 217     int64_t iteration_start_time;
 218     /*
 219      * The final stage happens when the remaining data is smaller than
 220      * this threshold; it's calculated from the requested downtime and
 221      * measured bandwidth
 222      */
 223     int64_t threshold_size;
 224
 225     /* params from 'migrate-set-parameters' */
 226     MigrationParameters parameters;
 227
 228     int state;
 229
 230     /* State related to return path */
 231     struct {
 232         /* Protected by qemu_file_lock */
 233         QEMUFile     *from_dst_file;
 234         QemuThread    rp_thread;
 235         bool          error;
 236         /*
 237          * We can also check non-zero of rp_thread, but there's no "official"
 238          * way to do this, so this bool makes it slightly more elegant.
 239          * Checking from_dst_file for this is racy because from_dst_file will
 240          * be cleared in the rp_thread!
 241          */
 242         bool          rp_thread_created;
 243         QemuSemaphore rp_sem;
 244     } rp_state;
 245
 246     double mbps;
 247     /* Timestamp when recent migration starts (ms) */
 248     int64_t start_time;
 249     /* Total time used by latest migration (ms) */
 250     int64_t total_time;
 251     /* Timestamp when VM is down (ms) to migrate the last stuff */
 252     int64_t downtime_start;
 253     int64_t downtime;
 254     int64_t expected_downtime;
 255     bool enabled_capabilities[MIGRATION_CAPABILITY__MAX];
 256     int64_t setup_time;
 257     /*
 258      * Whether guest was running when we enter the completion stage.
 259      * If migration is interrupted by any reason, we need to continue
 260      * running the guest on source.
 261      */
 262     bool vm_was_running;
 263
 264     /* Flag set once the migration has been asked to enter postcopy */
 265     bool start_postcopy;
 266     /* Flag set after postcopy has sent the device state */
 267     bool postcopy_after_devices;
 268
 269     /* Flag set once the migration thread is running (and needs joining) */
 270     bool migration_thread_running;
 271
 272     /* Flag set once the migration thread called bdrv_inactivate_all */
 273     bool block_inactive;
 274
 275     /* Migration is waiting for guest to unplug device */
 276     QemuSemaphore wait_unplug_sem;
 277
 278     /* Migration is paused due to pause-before-switchover */
 279     QemuSemaphore pause_sem;
 280
 281     /* The semaphore is used to notify COLO thread that failover is finished */
 282     QemuSemaphore colo_exit_sem;
 283
 284     /* The event is used to notify COLO thread to do checkpoint */
 285     QemuEvent colo_checkpoint_event;
 286     int64_t colo_checkpoint_time;
 287     QEMUTimer *colo_delay_timer;
 288
 289     /* The first error that has occurred.
 290        We used the mutex to be able to return the 1st error message */
 291     Error *error;
 292     /* mutex to protect errp */
 293     QemuMutex error_mutex;
 294
 295     /* Do we have to clean up -b/-i from old migrate parameters */
 296     /* This feature is deprecated and will be removed */
 297     bool must_remove_block_options;
 298
 299     /*
 300      * Global switch on whether we need to store the global state
 301      * during migration.
 302      */
 303     bool store_global_state;
 304
 305     /* Whether we send QEMU_VM_CONFIGURATION during migration */
 306     bool send_configuration;
 307     /* Whether we send section footer during migration */
 308     bool send_section_footer;
 309
 310     /* Needed by postcopy-pause state */
 311     QemuSemaphore postcopy_pause_sem;
 312     QemuSemaphore postcopy_pause_rp_sem;
 313     /*
 314      * Whether we abort the migration if decompression errors are
 315      * detected at the destination. It is left at false for qemu
 316      * older than 3.0, since only newer qemu sends streams that
 317      * do not trigger spurious decompression errors.
 318      */
 319     bool decompress_error_check;
 320
 321     /*
 322      * This decides the size of guest memory chunk that will be used
 323      * to track dirty bitmap clearing.  The size of memory chunk will
 324      * be GUEST_PAGE_SIZE << N.  Say, N=0 means we will clear dirty
 325      * bitmap for each page to send (1<<0=1); N=10 means we will clear
 326      * dirty bitmap only once for 1<<10=1K continuous guest pages
 327      * (which is in 4M chunk).
 328      */
 329     uint8_t clear_bitmap_shift;
 330
 331     /*
 332      * This save hostname when out-going migration starts
 333      */
 334     char *hostname;
 335 };
 336
 337 void migrate_set_state(int *state, int old_state, int new_state);
 338
 339 void migration_fd_process_incoming(QEMUFile *f, Error **errp);
 340 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
 341 void migration_incoming_process(void);
 342
 343 bool  migration_has_all_channels(void);
 344
 345 uint64_t migrate_max_downtime(void);
 346
 347 void migrate_set_error(MigrationState *s, const Error *error);
 348 void migrate_fd_error(MigrationState *s, const Error *error);
 349
 350 void migrate_fd_connect(MigrationState *s, Error *error_in);
 351
 352 bool migration_is_setup_or_active(int state);
 353 bool migration_is_running(int state);
 354
 355 void migrate_init(MigrationState *s);
 356 bool migration_is_blocked(Error **errp);
 357 /* True if outgoing migration has entered postcopy phase */
 358 bool migration_in_postcopy(void);
 359 MigrationState *migrate_get_current(void);
 360
 361 bool migrate_postcopy(void);
 362
 363 bool migrate_release_ram(void);
 364 bool migrate_postcopy_ram(void);
 365 bool migrate_zero_blocks(void);
 366 bool migrate_dirty_bitmaps(void);
 367 bool migrate_ignore_shared(void);
 368 bool migrate_validate_uuid(void);
 369
 370 bool migrate_auto_converge(void);
 371 bool migrate_use_multifd(void);
 372 bool migrate_pause_before_switchover(void);
 373 int migrate_multifd_channels(void);
 374 MultiFDCompression migrate_multifd_compression(void);
 375 int migrate_multifd_zlib_level(void);
 376 int migrate_multifd_zstd_level(void);
 377
 378 int migrate_use_xbzrle(void);
 379 uint64_t migrate_xbzrle_cache_size(void);
 380 bool migrate_colo_enabled(void);
 381
 382 bool migrate_use_block(void);
 383 bool migrate_use_block_incremental(void);
 384 int migrate_max_cpu_throttle(void);
 385 bool migrate_use_return_path(void);
 386
 387 uint64_t ram_get_total_transferred_pages(void);
 388
 389 bool migrate_use_compression(void);
 390 int migrate_compress_level(void);
 391 int migrate_compress_threads(void);
 392 int migrate_compress_wait_thread(void);
 393 int migrate_decompress_threads(void);
 394 bool migrate_use_events(void);
 395 bool migrate_postcopy_blocktime(void);
 396 bool migrate_background_snapshot(void);
 397
 398 /* Sending on the return path - generic and then for each message type */
 399 void migrate_send_rp_shut(MigrationIncomingState *mis,
 400                           uint32_t value);
 401 void migrate_send_rp_pong(MigrationIncomingState *mis,
 402                           uint32_t value);
 403 int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb,
 404                               ram_addr_t start, uint64_t haddr);
 405 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
 406                                       RAMBlock *rb, ram_addr_t start);
 407 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
 408                                  char *block_name);
 409 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value);
 410
 411 void dirty_bitmap_mig_before_vm_start(void);
 412 void dirty_bitmap_mig_cancel_outgoing(void);
 413 void dirty_bitmap_mig_cancel_incoming(void);
 414 bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm,
 415                                       Error **errp);
 416
 417 void migrate_add_address(SocketAddress *address);
 418
 419 int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque);
 420
 421 #define qemu_ram_foreach_block \
 422   #warning "Use foreach_not_ignored_block in migration code"
 423
 424 void migration_make_urgent_request(void);
 425 void migration_consume_urgent_request(void);
 426 bool migration_rate_limit(void);
 427 void migration_cancel(const Error *error);
 428
 429 void populate_vfio_info(MigrationInfo *info);
 430 void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page);
 431
 432 bool migrate_multi_channels_is_allowed(void);
 433 void migrate_protocol_allow_multi_channels(bool allow);
 434
 435 #endif