migration/ram.c

   1 /*
   2  * QEMU System Emulator
   3  *
   4  * Copyright (c) 2003-2008 Fabrice Bellard
   5  * Copyright (c) 2011-2015 Red Hat Inc
   6  *
   7  * Authors:
   8  *  Juan Quintela <quintela@redhat.com>
   9  *
  10  * Permission is hereby granted, free of charge, to any person obtaining a copy
  11  * of this software and associated documentation files (the "Software"), to deal
  12  * in the Software without restriction, including without limitation the rights
  13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14  * copies of the Software, and to permit persons to whom the Software is
  15  * furnished to do so, subject to the following conditions:
  16  *
  17  * The above copyright notice and this permission notice shall be included in
  18  * all copies or substantial portions of the Software.
  19  *
  20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26  * THE SOFTWARE.
  27  */
  28 #include "qemu/osdep.h"
  29 #include "qemu-common.h"
  30 #include "cpu.h"
  31 #include <zlib.h>
  32 #include "qapi-event.h"
  33 #include "qemu/cutils.h"
  34 #include "qemu/bitops.h"
  35 #include "qemu/bitmap.h"
  36 #include "qemu/timer.h"
  37 #include "qemu/main-loop.h"
  38 #include "migration/migration.h"
  39 #include "migration/postcopy-ram.h"
  40 #include "exec/address-spaces.h"
  41 #include "migration/page_cache.h"
  42 #include "qemu/error-report.h"
  43 #include "trace.h"
  44 #include "exec/ram_addr.h"
  45 #include "qemu/rcu_queue.h"
  46 #include "migration/colo.h"
  47
  48 /***********************************************************/
  49 /* ram save/restore */
  50
  51 #define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
  52 #define RAM_SAVE_FLAG_COMPRESS 0x02
  53 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
  54 #define RAM_SAVE_FLAG_PAGE     0x08
  55 #define RAM_SAVE_FLAG_EOS      0x10
  56 #define RAM_SAVE_FLAG_CONTINUE 0x20
  57 #define RAM_SAVE_FLAG_XBZRLE   0x40
  58 /* 0x80 is reserved in migration.h start with 0x100 next */
  59 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
  60
  61 static uint8_t *ZERO_TARGET_PAGE;
  62
  63 static inline bool is_zero_range(uint8_t *p, uint64_t size)
  64 {
  65     return buffer_is_zero(p, size);
  66 }
  67
  68 /* struct contains XBZRLE cache and a static page
  69    used by the compression */
  70 static struct {
  71     /* buffer used for XBZRLE encoding */
  72     uint8_t *encoded_buf;
  73     /* buffer for storing page content */
  74     uint8_t *current_buf;
  75     /* Cache for XBZRLE, Protected by lock. */
  76     PageCache *cache;
  77     QemuMutex lock;
  78 } XBZRLE;
  79
  80 /* buffer used for XBZRLE decoding */
  81 static uint8_t *xbzrle_decoded_buf;
  82
  83 static void XBZRLE_cache_lock(void)
  84 {
  85     if (migrate_use_xbzrle())
  86         qemu_mutex_lock(&XBZRLE.lock);
  87 }
  88
  89 static void XBZRLE_cache_unlock(void)
  90 {
  91     if (migrate_use_xbzrle())
  92         qemu_mutex_unlock(&XBZRLE.lock);
  93 }
  94
  95 /**
  96  * xbzrle_cache_resize: resize the xbzrle cache
  97  *
  98  * This function is called from qmp_migrate_set_cache_size in main
  99  * thread, possibly while a migration is in progress.  A running
 100  * migration may be using the cache and might finish during this call,
 101  * hence changes to the cache are protected by XBZRLE.lock().
 102  *
 103  * Returns the new_size or negative in case of error.
 104  *
 105  * @new_size: new cache size
 106  */
 107 int64_t xbzrle_cache_resize(int64_t new_size)
 108 {
 109     PageCache *new_cache;
 110     int64_t ret;
 111
 112     if (new_size < TARGET_PAGE_SIZE) {
 113         return -1;
 114     }
 115
 116     XBZRLE_cache_lock();
 117
 118     if (XBZRLE.cache != NULL) {
 119         if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
 120             goto out_new_size;
 121         }
 122         new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
 123                                         TARGET_PAGE_SIZE);
 124         if (!new_cache) {
 125             error_report("Error creating cache");
 126             ret = -1;
 127             goto out;
 128         }
 129
 130         cache_fini(XBZRLE.cache);
 131         XBZRLE.cache = new_cache;
 132     }
 133
 134 out_new_size:
 135     ret = pow2floor(new_size);
 136 out:
 137     XBZRLE_cache_unlock();
 138     return ret;
 139 }
 140
 141 struct RAMBitmap {
 142     struct rcu_head rcu;
 143     /* Main migration bitmap */
 144     unsigned long *bmap;
 145     /* bitmap of pages that haven't been sent even once
 146      * only maintained and used in postcopy at the moment
 147      * where it's used to send the dirtymap at the start
 148      * of the postcopy phase
 149      */
 150     unsigned long *unsentmap;
 151 };
 152 typedef struct RAMBitmap RAMBitmap;
 153
 154 /*
 155  * An outstanding page request, on the source, having been received
 156  * and queued
 157  */
 158 struct RAMSrcPageRequest {
 159     RAMBlock *rb;
 160     hwaddr    offset;
 161     hwaddr    len;
 162
 163     QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
 164 };
 165
 166 /* State of RAM for migration */
 167 struct RAMState {
 168     /* QEMUFile used for this migration */
 169     QEMUFile *f;
 170     /* Last block that we have visited searching for dirty pages */
 171     RAMBlock *last_seen_block;
 172     /* Last block from where we have sent data */
 173     RAMBlock *last_sent_block;
 174     /* Last dirty target page we have sent */
 175     ram_addr_t last_page;
 176     /* last ram version we have seen */
 177     uint32_t last_version;
 178     /* We are in the first round */
 179     bool ram_bulk_stage;
 180     /* How many times we have dirty too many pages */
 181     int dirty_rate_high_cnt;
 182     /* How many times we have synchronized the bitmap */
 183     uint64_t bitmap_sync_count;
 184     /* these variables are used for bitmap sync */
 185     /* last time we did a full bitmap_sync */
 186     int64_t time_last_bitmap_sync;
 187     /* bytes transferred at start_time */
 188     uint64_t bytes_xfer_prev;
 189     /* number of dirty pages since start_time */
 190     uint64_t num_dirty_pages_period;
 191     /* xbzrle misses since the beginning of the period */
 192     uint64_t xbzrle_cache_miss_prev;
 193     /* number of iterations at the beginning of period */
 194     uint64_t iterations_prev;
 195     /* Accounting fields */
 196     /* number of zero pages.  It used to be pages filled by the same char. */
 197     uint64_t zero_pages;
 198     /* number of normal transferred pages */
 199     uint64_t norm_pages;
 200     /* Iterations since start */
 201     uint64_t iterations;
 202     /* xbzrle transmitted bytes.  Notice that this is with
 203      * compression, they can't be calculated from the pages */
 204     uint64_t xbzrle_bytes;
 205     /* xbzrle transmmited pages */
 206     uint64_t xbzrle_pages;
 207     /* xbzrle number of cache miss */
 208     uint64_t xbzrle_cache_miss;
 209     /* xbzrle miss rate */
 210     double xbzrle_cache_miss_rate;
 211     /* xbzrle number of overflows */
 212     uint64_t xbzrle_overflows;
 213     /* number of dirty bits in the bitmap */
 214     uint64_t migration_dirty_pages;
 215     /* total number of bytes transferred */
 216     uint64_t bytes_transferred;
 217     /* number of dirtied pages in the last second */
 218     uint64_t dirty_pages_rate;
 219     /* Count of requests incoming from destination */
 220     uint64_t postcopy_requests;
 221     /* protects modification of the bitmap */
 222     QemuMutex bitmap_mutex;
 223     /* Ram Bitmap protected by RCU */
 224     RAMBitmap *ram_bitmap;
 225     /* The RAMBlock used in the last src_page_requests */
 226     RAMBlock *last_req_rb;
 227     /* Queue of outstanding page requests from the destination */
 228     QemuMutex src_page_req_mutex;
 229     QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
 230 };
 231 typedef struct RAMState RAMState;
 232
 233 static RAMState ram_state;
 234
 235 uint64_t dup_mig_pages_transferred(void)
 236 {
 237     return ram_state.zero_pages;
 238 }
 239
 240 uint64_t norm_mig_pages_transferred(void)
 241 {
 242     return ram_state.norm_pages;
 243 }
 244
 245 uint64_t xbzrle_mig_bytes_transferred(void)
 246 {
 247     return ram_state.xbzrle_bytes;
 248 }
 249
 250 uint64_t xbzrle_mig_pages_transferred(void)
 251 {
 252     return ram_state.xbzrle_pages;
 253 }
 254
 255 uint64_t xbzrle_mig_pages_cache_miss(void)
 256 {
 257     return ram_state.xbzrle_cache_miss;
 258 }
 259
 260 double xbzrle_mig_cache_miss_rate(void)
 261 {
 262     return ram_state.xbzrle_cache_miss_rate;
 263 }
 264
 265 uint64_t xbzrle_mig_pages_overflow(void)
 266 {
 267     return ram_state.xbzrle_overflows;
 268 }
 269
 270 uint64_t ram_bytes_transferred(void)
 271 {
 272     return ram_state.bytes_transferred;
 273 }
 274
 275 uint64_t ram_bytes_remaining(void)
 276 {
 277     return ram_state.migration_dirty_pages * TARGET_PAGE_SIZE;
 278 }
 279
 280 uint64_t ram_dirty_sync_count(void)
 281 {
 282     return ram_state.bitmap_sync_count;
 283 }
 284
 285 uint64_t ram_dirty_pages_rate(void)
 286 {
 287     return ram_state.dirty_pages_rate;
 288 }
 289
 290 uint64_t ram_postcopy_requests(void)
 291 {
 292     return ram_state.postcopy_requests;
 293 }
 294
 295 /* used by the search for pages to send */
 296 struct PageSearchStatus {
 297     /* Current block being searched */
 298     RAMBlock    *block;
 299     /* Current page to search from */
 300     unsigned long page;
 301     /* Set once we wrap around */
 302     bool         complete_round;
 303 };
 304 typedef struct PageSearchStatus PageSearchStatus;
 305
 306 struct CompressParam {
 307     bool done;
 308     bool quit;
 309     QEMUFile *file;
 310     QemuMutex mutex;
 311     QemuCond cond;
 312     RAMBlock *block;
 313     ram_addr_t offset;
 314 };
 315 typedef struct CompressParam CompressParam;
 316
 317 struct DecompressParam {
 318     bool done;
 319     bool quit;
 320     QemuMutex mutex;
 321     QemuCond cond;
 322     void *des;
 323     uint8_t *compbuf;
 324     int len;
 325 };
 326 typedef struct DecompressParam DecompressParam;
 327
 328 static CompressParam *comp_param;
 329 static QemuThread *compress_threads;
 330 /* comp_done_cond is used to wake up the migration thread when
 331  * one of the compression threads has finished the compression.
 332  * comp_done_lock is used to co-work with comp_done_cond.
 333  */
 334 static QemuMutex comp_done_lock;
 335 static QemuCond comp_done_cond;
 336 /* The empty QEMUFileOps will be used by file in CompressParam */
 337 static const QEMUFileOps empty_ops = { };
 338
 339 static DecompressParam *decomp_param;
 340 static QemuThread *decompress_threads;
 341 static QemuMutex decomp_done_lock;
 342 static QemuCond decomp_done_cond;
 343
 344 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
 345                                 ram_addr_t offset);
 346
 347 static void *do_data_compress(void *opaque)
 348 {
 349     CompressParam *param = opaque;
 350     RAMBlock *block;
 351     ram_addr_t offset;
 352
 353     qemu_mutex_lock(&param->mutex);
 354     while (!param->quit) {
 355         if (param->block) {
 356             block = param->block;
 357             offset = param->offset;
 358             param->block = NULL;
 359             qemu_mutex_unlock(&param->mutex);
 360
 361             do_compress_ram_page(param->file, block, offset);
 362
 363             qemu_mutex_lock(&comp_done_lock);
 364             param->done = true;
 365             qemu_cond_signal(&comp_done_cond);
 366             qemu_mutex_unlock(&comp_done_lock);
 367
 368             qemu_mutex_lock(&param->mutex);
 369         } else {
 370             qemu_cond_wait(&param->cond, &param->mutex);
 371         }
 372     }
 373     qemu_mutex_unlock(&param->mutex);
 374
 375     return NULL;
 376 }
 377
 378 static inline void terminate_compression_threads(void)
 379 {
 380     int idx, thread_count;
 381
 382     thread_count = migrate_compress_threads();
 383
 384     for (idx = 0; idx < thread_count; idx++) {
 385         qemu_mutex_lock(&comp_param[idx].mutex);
 386         comp_param[idx].quit = true;
 387         qemu_cond_signal(&comp_param[idx].cond);
 388         qemu_mutex_unlock(&comp_param[idx].mutex);
 389     }
 390 }
 391
 392 void migrate_compress_threads_join(void)
 393 {
 394     int i, thread_count;
 395
 396     if (!migrate_use_compression()) {
 397         return;
 398     }
 399     terminate_compression_threads();
 400     thread_count = migrate_compress_threads();
 401     for (i = 0; i < thread_count; i++) {
 402         qemu_thread_join(compress_threads + i);
 403         qemu_fclose(comp_param[i].file);
 404         qemu_mutex_destroy(&comp_param[i].mutex);
 405         qemu_cond_destroy(&comp_param[i].cond);
 406     }
 407     qemu_mutex_destroy(&comp_done_lock);
 408     qemu_cond_destroy(&comp_done_cond);
 409     g_free(compress_threads);
 410     g_free(comp_param);
 411     compress_threads = NULL;
 412     comp_param = NULL;
 413 }
 414
 415 void migrate_compress_threads_create(void)
 416 {
 417     int i, thread_count;
 418
 419     if (!migrate_use_compression()) {
 420         return;
 421     }
 422     thread_count = migrate_compress_threads();
 423     compress_threads = g_new0(QemuThread, thread_count);
 424     comp_param = g_new0(CompressParam, thread_count);
 425     qemu_cond_init(&comp_done_cond);
 426     qemu_mutex_init(&comp_done_lock);
 427     for (i = 0; i < thread_count; i++) {
 428         /* comp_param[i].file is just used as a dummy buffer to save data,
 429          * set its ops to empty.
 430          */
 431         comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
 432         comp_param[i].done = true;
 433         comp_param[i].quit = false;
 434         qemu_mutex_init(&comp_param[i].mutex);
 435         qemu_cond_init(&comp_param[i].cond);
 436         qemu_thread_create(compress_threads + i, "compress",
 437                            do_data_compress, comp_param + i,
 438                            QEMU_THREAD_JOINABLE);
 439     }
 440 }
 441
 442 /**
 443  * save_page_header: write page header to wire
 444  *
 445  * If this is the 1st block, it also writes the block identification
 446  *
 447  * Returns the number of bytes written
 448  *
 449  * @f: QEMUFile where to send the data
 450  * @block: block that contains the page we want to send
 451  * @offset: offset inside the block for the page
 452  *          in the lower bits, it contains flags
 453  */
 454 static size_t save_page_header(RAMState *rs, RAMBlock *block, ram_addr_t offset)
 455 {
 456     size_t size, len;
 457
 458     if (block == rs->last_sent_block) {
 459         offset |= RAM_SAVE_FLAG_CONTINUE;
 460     }
 461     qemu_put_be64(rs->f, offset);
 462     size = 8;
 463
 464     if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
 465         len = strlen(block->idstr);
 466         qemu_put_byte(rs->f, len);
 467         qemu_put_buffer(rs->f, (uint8_t *)block->idstr, len);
 468         size += 1 + len;
 469         rs->last_sent_block = block;
 470     }
 471     return size;
 472 }
 473
 474 /**
 475  * mig_throttle_guest_down: throotle down the guest
 476  *
 477  * Reduce amount of guest cpu execution to hopefully slow down memory
 478  * writes. If guest dirty memory rate is reduced below the rate at
 479  * which we can transfer pages to the destination then we should be
 480  * able to complete migration. Some workloads dirty memory way too
 481  * fast and will not effectively converge, even with auto-converge.
 482  */
 483 static void mig_throttle_guest_down(void)
 484 {
 485     MigrationState *s = migrate_get_current();
 486     uint64_t pct_initial = s->parameters.cpu_throttle_initial;
 487     uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
 488
 489     /* We have not started throttling yet. Let's start it. */
 490     if (!cpu_throttle_active()) {
 491         cpu_throttle_set(pct_initial);
 492     } else {
 493         /* Throttling already on, just increase the rate */
 494         cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
 495     }
 496 }
 497
 498 /**
 499  * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
 500  *
 501  * @rs: current RAM state
 502  * @current_addr: address for the zero page
 503  *
 504  * Update the xbzrle cache to reflect a page that's been sent as all 0.
 505  * The important thing is that a stale (not-yet-0'd) page be replaced
 506  * by the new data.
 507  * As a bonus, if the page wasn't in the cache it gets added so that
 508  * when a small write is made into the 0'd page it gets XBZRLE sent.
 509  */
 510 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
 511 {
 512     if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
 513         return;
 514     }
 515
 516     /* We don't care if this fails to allocate a new cache page
 517      * as long as it updated an old one */
 518     cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
 519                  rs->bitmap_sync_count);
 520 }
 521
 522 #define ENCODING_FLAG_XBZRLE 0x1
 523
 524 /**
 525  * save_xbzrle_page: compress and send current page
 526  *
 527  * Returns: 1 means that we wrote the page
 528  *          0 means that page is identical to the one already sent
 529  *          -1 means that xbzrle would be longer than normal
 530  *
 531  * @rs: current RAM state
 532  * @current_data: pointer to the address of the page contents
 533  * @current_addr: addr of the page
 534  * @block: block that contains the page we want to send
 535  * @offset: offset inside the block for the page
 536  * @last_stage: if we are at the completion stage
 537  */
 538 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
 539                             ram_addr_t current_addr, RAMBlock *block,
 540                             ram_addr_t offset, bool last_stage)
 541 {
 542     int encoded_len = 0, bytes_xbzrle;
 543     uint8_t *prev_cached_page;
 544
 545     if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) {
 546         rs->xbzrle_cache_miss++;
 547         if (!last_stage) {
 548             if (cache_insert(XBZRLE.cache, current_addr, *current_data,
 549                              rs->bitmap_sync_count) == -1) {
 550                 return -1;
 551             } else {
 552                 /* update *current_data when the page has been
 553                    inserted into cache */
 554                 *current_data = get_cached_data(XBZRLE.cache, current_addr);
 555             }
 556         }
 557         return -1;
 558     }
 559
 560     prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
 561
 562     /* save current buffer into memory */
 563     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
 564
 565     /* XBZRLE encoding (if there is no overflow) */
 566     encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
 567                                        TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
 568                                        TARGET_PAGE_SIZE);
 569     if (encoded_len == 0) {
 570         trace_save_xbzrle_page_skipping();
 571         return 0;
 572     } else if (encoded_len == -1) {
 573         trace_save_xbzrle_page_overflow();
 574         rs->xbzrle_overflows++;
 575         /* update data in the cache */
 576         if (!last_stage) {
 577             memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
 578             *current_data = prev_cached_page;
 579         }
 580         return -1;
 581     }
 582
 583     /* we need to update the data in the cache, in order to get the same data */
 584     if (!last_stage) {
 585         memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
 586     }
 587
 588     /* Send XBZRLE based compressed page */
 589     bytes_xbzrle = save_page_header(rs, block,
 590                                     offset | RAM_SAVE_FLAG_XBZRLE);
 591     qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
 592     qemu_put_be16(rs->f, encoded_len);
 593     qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
 594     bytes_xbzrle += encoded_len + 1 + 2;
 595     rs->xbzrle_pages++;
 596     rs->xbzrle_bytes += bytes_xbzrle;
 597     rs->bytes_transferred += bytes_xbzrle;
 598
 599     return 1;
 600 }
 601
 602 /**
 603  * migration_bitmap_find_dirty: find the next dirty page from start
 604  *
 605  * Called with rcu_read_lock() to protect migration_bitmap
 606  *
 607  * Returns the byte offset within memory region of the start of a dirty page
 608  *
 609  * @rs: current RAM state
 610  * @rb: RAMBlock where to search for dirty pages
 611  * @start: page where we start the search
 612  */
 613 static inline
 614 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
 615                                           unsigned long start)
 616 {
 617     unsigned long base = rb->offset >> TARGET_PAGE_BITS;
 618     unsigned long nr = base + start;
 619     uint64_t rb_size = rb->used_length;
 620     unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
 621     unsigned long *bitmap;
 622
 623     unsigned long next;
 624
 625     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
 626     if (rs->ram_bulk_stage && nr > base) {
 627         next = nr + 1;
 628     } else {
 629         next = find_next_bit(bitmap, size, nr);
 630     }
 631
 632     return next - base;
 633 }
 634
 635 static inline bool migration_bitmap_clear_dirty(RAMState *rs,
 636                                                 RAMBlock *rb,
 637                                                 unsigned long page)
 638 {
 639     bool ret;
 640     unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
 641     unsigned long nr = (rb->offset >> TARGET_PAGE_BITS) + page;
 642
 643     ret = test_and_clear_bit(nr, bitmap);
 644
 645     if (ret) {
 646         rs->migration_dirty_pages--;
 647     }
 648     return ret;
 649 }
 650
 651 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
 652                                         ram_addr_t start, ram_addr_t length)
 653 {
 654     unsigned long *bitmap;
 655     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
 656     rs->migration_dirty_pages +=
 657         cpu_physical_memory_sync_dirty_bitmap(bitmap, rb, start, length,
 658                                               &rs->num_dirty_pages_period);
 659 }
 660
 661 /**
 662  * ram_pagesize_summary: calculate all the pagesizes of a VM
 663  *
 664  * Returns a summary bitmap of the page sizes of all RAMBlocks
 665  *
 666  * For VMs with just normal pages this is equivalent to the host page
 667  * size. If it's got some huge pages then it's the OR of all the
 668  * different page sizes.
 669  */
 670 uint64_t ram_pagesize_summary(void)
 671 {
 672     RAMBlock *block;
 673     uint64_t summary = 0;
 674
 675     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 676         summary |= block->page_size;
 677     }
 678
 679     return summary;
 680 }
 681
 682 static void migration_bitmap_sync(RAMState *rs)
 683 {
 684     RAMBlock *block;
 685     int64_t end_time;
 686     uint64_t bytes_xfer_now;
 687
 688     rs->bitmap_sync_count++;
 689
 690     if (!rs->bytes_xfer_prev) {
 691         rs->bytes_xfer_prev = ram_bytes_transferred();
 692     }
 693
 694     if (!rs->time_last_bitmap_sync) {
 695         rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 696     }
 697
 698     trace_migration_bitmap_sync_start();
 699     memory_global_dirty_log_sync();
 700
 701     qemu_mutex_lock(&rs->bitmap_mutex);
 702     rcu_read_lock();
 703     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 704         migration_bitmap_sync_range(rs, block, 0, block->used_length);
 705     }
 706     rcu_read_unlock();
 707     qemu_mutex_unlock(&rs->bitmap_mutex);
 708
 709     trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
 710
 711     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 712
 713     /* more than 1 second = 1000 millisecons */
 714     if (end_time > rs->time_last_bitmap_sync + 1000) {
 715         if (migrate_auto_converge()) {
 716             /* The following detection logic can be refined later. For now:
 717                Check to see if the dirtied bytes is 50% more than the approx.
 718                amount of bytes that just got transferred since the last time we
 719                were in this routine. If that happens twice, start or increase
 720                throttling */
 721             bytes_xfer_now = ram_bytes_transferred();
 722
 723             if (rs->dirty_pages_rate &&
 724                (rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
 725                    (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
 726                (rs->dirty_rate_high_cnt++ >= 2)) {
 727                     trace_migration_throttle();
 728                     rs->dirty_rate_high_cnt = 0;
 729                     mig_throttle_guest_down();
 730              }
 731              rs->bytes_xfer_prev = bytes_xfer_now;
 732         }
 733
 734         if (migrate_use_xbzrle()) {
 735             if (rs->iterations_prev != rs->iterations) {
 736                 rs->xbzrle_cache_miss_rate =
 737                    (double)(rs->xbzrle_cache_miss -
 738                             rs->xbzrle_cache_miss_prev) /
 739                    (rs->iterations - rs->iterations_prev);
 740             }
 741             rs->iterations_prev = rs->iterations;
 742             rs->xbzrle_cache_miss_prev = rs->xbzrle_cache_miss;
 743         }
 744         rs->dirty_pages_rate = rs->num_dirty_pages_period * 1000
 745             / (end_time - rs->time_last_bitmap_sync);
 746         rs->time_last_bitmap_sync = end_time;
 747         rs->num_dirty_pages_period = 0;
 748     }
 749     if (migrate_use_events()) {
 750         qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL);
 751     }
 752 }
 753
 754 /**
 755  * save_zero_page: send the zero page to the stream
 756  *
 757  * Returns the number of pages written.
 758  *
 759  * @rs: current RAM state
 760  * @block: block that contains the page we want to send
 761  * @offset: offset inside the block for the page
 762  * @p: pointer to the page
 763  */
 764 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
 765                           uint8_t *p)
 766 {
 767     int pages = -1;
 768
 769     if (is_zero_range(p, TARGET_PAGE_SIZE)) {
 770         rs->zero_pages++;
 771         rs->bytes_transferred +=
 772             save_page_header(rs, block, offset | RAM_SAVE_FLAG_COMPRESS);
 773         qemu_put_byte(rs->f, 0);
 774         rs->bytes_transferred += 1;
 775         pages = 1;
 776     }
 777
 778     return pages;
 779 }
 780
 781 static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
 782 {
 783     if (!migrate_release_ram() || !migration_in_postcopy()) {
 784         return;
 785     }
 786
 787     ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
 788 }
 789
 790 /**
 791  * ram_save_page: send the given page to the stream
 792  *
 793  * Returns the number of pages written.
 794  *          < 0 - error
 795  *          >=0 - Number of pages written - this might legally be 0
 796  *                if xbzrle noticed the page was the same.
 797  *
 798  * @rs: current RAM state
 799  * @block: block that contains the page we want to send
 800  * @offset: offset inside the block for the page
 801  * @last_stage: if we are at the completion stage
 802  */
 803 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
 804 {
 805     int pages = -1;
 806     uint64_t bytes_xmit;
 807     ram_addr_t current_addr;
 808     uint8_t *p;
 809     int ret;
 810     bool send_async = true;
 811     RAMBlock *block = pss->block;
 812     ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
 813
 814     p = block->host + offset;
 815
 816     /* In doubt sent page as normal */
 817     bytes_xmit = 0;
 818     ret = ram_control_save_page(rs->f, block->offset,
 819                            offset, TARGET_PAGE_SIZE, &bytes_xmit);
 820     if (bytes_xmit) {
 821         rs->bytes_transferred += bytes_xmit;
 822         pages = 1;
 823     }
 824
 825     XBZRLE_cache_lock();
 826
 827     current_addr = block->offset + offset;
 828
 829     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
 830         if (ret != RAM_SAVE_CONTROL_DELAYED) {
 831             if (bytes_xmit > 0) {
 832                 rs->norm_pages++;
 833             } else if (bytes_xmit == 0) {
 834                 rs->zero_pages++;
 835             }
 836         }
 837     } else {
 838         pages = save_zero_page(rs, block, offset, p);
 839         if (pages > 0) {
 840             /* Must let xbzrle know, otherwise a previous (now 0'd) cached
 841              * page would be stale
 842              */
 843             xbzrle_cache_zero_page(rs, current_addr);
 844             ram_release_pages(block->idstr, offset, pages);
 845         } else if (!rs->ram_bulk_stage &&
 846                    !migration_in_postcopy() && migrate_use_xbzrle()) {
 847             pages = save_xbzrle_page(rs, &p, current_addr, block,
 848                                      offset, last_stage);
 849             if (!last_stage) {
 850                 /* Can't send this cached data async, since the cache page
 851                  * might get updated before it gets to the wire
 852                  */
 853                 send_async = false;
 854             }
 855         }
 856     }
 857
 858     /* XBZRLE overflow or normal page */
 859     if (pages == -1) {
 860         rs->bytes_transferred += save_page_header(rs, block,
 861                                                   offset | RAM_SAVE_FLAG_PAGE);
 862         if (send_async) {
 863             qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
 864                                   migrate_release_ram() &
 865                                   migration_in_postcopy());
 866         } else {
 867             qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
 868         }
 869         rs->bytes_transferred += TARGET_PAGE_SIZE;
 870         pages = 1;
 871         rs->norm_pages++;
 872     }
 873
 874     XBZRLE_cache_unlock();
 875
 876     return pages;
 877 }
 878
 879 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
 880                                 ram_addr_t offset)
 881 {
 882     RAMState *rs = &ram_state;
 883     int bytes_sent, blen;
 884     uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
 885
 886     bytes_sent = save_page_header(rs, block, offset |
 887                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
 888     blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
 889                                      migrate_compress_level());
 890     if (blen < 0) {
 891         bytes_sent = 0;
 892         qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
 893         error_report("compressed data failed!");
 894     } else {
 895         bytes_sent += blen;
 896         ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
 897     }
 898
 899     return bytes_sent;
 900 }
 901
 902 static void flush_compressed_data(RAMState *rs)
 903 {
 904     int idx, len, thread_count;
 905
 906     if (!migrate_use_compression()) {
 907         return;
 908     }
 909     thread_count = migrate_compress_threads();
 910
 911     qemu_mutex_lock(&comp_done_lock);
 912     for (idx = 0; idx < thread_count; idx++) {
 913         while (!comp_param[idx].done) {
 914             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
 915         }
 916     }
 917     qemu_mutex_unlock(&comp_done_lock);
 918
 919     for (idx = 0; idx < thread_count; idx++) {
 920         qemu_mutex_lock(&comp_param[idx].mutex);
 921         if (!comp_param[idx].quit) {
 922             len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
 923             rs->bytes_transferred += len;
 924         }
 925         qemu_mutex_unlock(&comp_param[idx].mutex);
 926     }
 927 }
 928
 929 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
 930                                        ram_addr_t offset)
 931 {
 932     param->block = block;
 933     param->offset = offset;
 934 }
 935
 936 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
 937                                            ram_addr_t offset)
 938 {
 939     int idx, thread_count, bytes_xmit = -1, pages = -1;
 940
 941     thread_count = migrate_compress_threads();
 942     qemu_mutex_lock(&comp_done_lock);
 943     while (true) {
 944         for (idx = 0; idx < thread_count; idx++) {
 945             if (comp_param[idx].done) {
 946                 comp_param[idx].done = false;
 947                 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
 948                 qemu_mutex_lock(&comp_param[idx].mutex);
 949                 set_compress_params(&comp_param[idx], block, offset);
 950                 qemu_cond_signal(&comp_param[idx].cond);
 951                 qemu_mutex_unlock(&comp_param[idx].mutex);
 952                 pages = 1;
 953                 rs->norm_pages++;
 954                 rs->bytes_transferred += bytes_xmit;
 955                 break;
 956             }
 957         }
 958         if (pages > 0) {
 959             break;
 960         } else {
 961             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
 962         }
 963     }
 964     qemu_mutex_unlock(&comp_done_lock);
 965
 966     return pages;
 967 }
 968
 969 /**
 970  * ram_save_compressed_page: compress the given page and send it to the stream
 971  *
 972  * Returns the number of pages written.
 973  *
 974  * @rs: current RAM state
 975  * @block: block that contains the page we want to send
 976  * @offset: offset inside the block for the page
 977  * @last_stage: if we are at the completion stage
 978  */
 979 static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss,
 980                                     bool last_stage)
 981 {
 982     int pages = -1;
 983     uint64_t bytes_xmit = 0;
 984     uint8_t *p;
 985     int ret, blen;
 986     RAMBlock *block = pss->block;
 987     ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
 988
 989     p = block->host + offset;
 990
 991     ret = ram_control_save_page(rs->f, block->offset,
 992                                 offset, TARGET_PAGE_SIZE, &bytes_xmit);
 993     if (bytes_xmit) {
 994         rs->bytes_transferred += bytes_xmit;
 995         pages = 1;
 996     }
 997     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
 998         if (ret != RAM_SAVE_CONTROL_DELAYED) {
 999             if (bytes_xmit > 0) {
1000                 rs->norm_pages++;
1001             } else if (bytes_xmit == 0) {
1002                 rs->zero_pages++;
1003             }
1004         }
1005     } else {
1006         /* When starting the process of a new block, the first page of
1007          * the block should be sent out before other pages in the same
1008          * block, and all the pages in last block should have been sent
1009          * out, keeping this order is important, because the 'cont' flag
1010          * is used to avoid resending the block name.
1011          */
1012         if (block != rs->last_sent_block) {
1013             flush_compressed_data(rs);
1014             pages = save_zero_page(rs, block, offset, p);
1015             if (pages == -1) {
1016                 /* Make sure the first page is sent out before other pages */
1017                 bytes_xmit = save_page_header(rs, block, offset |
1018                                               RAM_SAVE_FLAG_COMPRESS_PAGE);
1019                 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE,
1020                                                  migrate_compress_level());
1021                 if (blen > 0) {
1022                     rs->bytes_transferred += bytes_xmit + blen;
1023                     rs->norm_pages++;
1024                     pages = 1;
1025                 } else {
1026                     qemu_file_set_error(rs->f, blen);
1027                     error_report("compressed data failed!");
1028                 }
1029             }
1030             if (pages > 0) {
1031                 ram_release_pages(block->idstr, offset, pages);
1032             }
1033         } else {
1034             pages = save_zero_page(rs, block, offset, p);
1035             if (pages == -1) {
1036                 pages = compress_page_with_multi_thread(rs, block, offset);
1037             } else {
1038                 ram_release_pages(block->idstr, offset, pages);
1039             }
1040         }
1041     }
1042
1043     return pages;
1044 }
1045
1046 /**
1047  * find_dirty_block: find the next dirty page and update any state
1048  * associated with the search process.
1049  *
1050  * Returns if a page is found
1051  *
1052  * @rs: current RAM state
1053  * @pss: data about the state of the current dirty page scan
1054  * @again: set to false if the search has scanned the whole of RAM
1055  */
1056 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
1057 {
1058     pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
1059     if (pss->complete_round && pss->block == rs->last_seen_block &&
1060         pss->page >= rs->last_page) {
1061         /*
1062          * We've been once around the RAM and haven't found anything.
1063          * Give up.
1064          */
1065         *again = false;
1066         return false;
1067     }
1068     if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
1069         /* Didn't find anything in this RAM Block */
1070         pss->page = 0;
1071         pss->block = QLIST_NEXT_RCU(pss->block, next);
1072         if (!pss->block) {
1073             /* Hit the end of the list */
1074             pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1075             /* Flag that we've looped */
1076             pss->complete_round = true;
1077             rs->ram_bulk_stage = false;
1078             if (migrate_use_xbzrle()) {
1079                 /* If xbzrle is on, stop using the data compression at this
1080                  * point. In theory, xbzrle can do better than compression.
1081                  */
1082                 flush_compressed_data(rs);
1083             }
1084         }
1085         /* Didn't find anything this time, but try again on the new block */
1086         *again = true;
1087         return false;
1088     } else {
1089         /* Can go around again, but... */
1090         *again = true;
1091         /* We've found something so probably don't need to */
1092         return true;
1093     }
1094 }
1095
1096 /**
1097  * unqueue_page: gets a page of the queue
1098  *
1099  * Helper for 'get_queued_page' - gets a page off the queue
1100  *
1101  * Returns the block of the page (or NULL if none available)
1102  *
1103  * @rs: current RAM state
1104  * @offset: used to return the offset within the RAMBlock
1105  */
1106 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1107 {
1108     RAMBlock *block = NULL;
1109
1110     qemu_mutex_lock(&rs->src_page_req_mutex);
1111     if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1112         struct RAMSrcPageRequest *entry =
1113                                 QSIMPLEQ_FIRST(&rs->src_page_requests);
1114         block = entry->rb;
1115         *offset = entry->offset;
1116
1117         if (entry->len > TARGET_PAGE_SIZE) {
1118             entry->len -= TARGET_PAGE_SIZE;
1119             entry->offset += TARGET_PAGE_SIZE;
1120         } else {
1121             memory_region_unref(block->mr);
1122             QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1123             g_free(entry);
1124         }
1125     }
1126     qemu_mutex_unlock(&rs->src_page_req_mutex);
1127
1128     return block;
1129 }
1130
1131 /**
1132  * get_queued_page: unqueue a page from the postocpy requests
1133  *
1134  * Skips pages that are already sent (!dirty)
1135  *
1136  * Returns if a queued page is found
1137  *
1138  * @rs: current RAM state
1139  * @pss: data about the state of the current dirty page scan
1140  */
1141 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1142 {
1143     RAMBlock  *block;
1144     ram_addr_t offset;
1145     bool dirty;
1146
1147     do {
1148         block = unqueue_page(rs, &offset);
1149         /*
1150          * We're sending this page, and since it's postcopy nothing else
1151          * will dirty it, and we must make sure it doesn't get sent again
1152          * even if this queue request was received after the background
1153          * search already sent it.
1154          */
1155         if (block) {
1156             unsigned long *bitmap;
1157             unsigned long page;
1158
1159             bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1160             page = (block->offset + offset) >> TARGET_PAGE_BITS;
1161             dirty = test_bit(page, bitmap);
1162             if (!dirty) {
1163                 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1164                     page,
1165                     test_bit(page,
1166                              atomic_rcu_read(&rs->ram_bitmap)->unsentmap));
1167             } else {
1168                 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
1169             }
1170         }
1171
1172     } while (block && !dirty);
1173
1174     if (block) {
1175         /*
1176          * As soon as we start servicing pages out of order, then we have
1177          * to kill the bulk stage, since the bulk stage assumes
1178          * in (migration_bitmap_find_and_reset_dirty) that every page is
1179          * dirty, that's no longer true.
1180          */
1181         rs->ram_bulk_stage = false;
1182
1183         /*
1184          * We want the background search to continue from the queued page
1185          * since the guest is likely to want other pages near to the page
1186          * it just requested.
1187          */
1188         pss->block = block;
1189         pss->page = offset >> TARGET_PAGE_BITS;
1190     }
1191
1192     return !!block;
1193 }
1194
1195 /**
1196  * migration_page_queue_free: drop any remaining pages in the ram
1197  * request queue
1198  *
1199  * It should be empty at the end anyway, but in error cases there may
1200  * be some left.  in case that there is any page left, we drop it.
1201  *
1202  */
1203 void migration_page_queue_free(void)
1204 {
1205     struct RAMSrcPageRequest *mspr, *next_mspr;
1206     RAMState *rs = &ram_state;
1207     /* This queue generally should be empty - but in the case of a failed
1208      * migration might have some droppings in.
1209      */
1210     rcu_read_lock();
1211     QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
1212         memory_region_unref(mspr->rb->mr);
1213         QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1214         g_free(mspr);
1215     }
1216     rcu_read_unlock();
1217 }
1218
1219 /**
1220  * ram_save_queue_pages: queue the page for transmission
1221  *
1222  * A request from postcopy destination for example.
1223  *
1224  * Returns zero on success or negative on error
1225  *
1226  * @rbname: Name of the RAMBLock of the request. NULL means the
1227  *          same that last one.
1228  * @start: starting address from the start of the RAMBlock
1229  * @len: length (in bytes) to send
1230  */
1231 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
1232 {
1233     RAMBlock *ramblock;
1234     RAMState *rs = &ram_state;
1235
1236     rs->postcopy_requests++;
1237     rcu_read_lock();
1238     if (!rbname) {
1239         /* Reuse last RAMBlock */
1240         ramblock = rs->last_req_rb;
1241
1242         if (!ramblock) {
1243             /*
1244              * Shouldn't happen, we can't reuse the last RAMBlock if
1245              * it's the 1st request.
1246              */
1247             error_report("ram_save_queue_pages no previous block");
1248             goto err;
1249         }
1250     } else {
1251         ramblock = qemu_ram_block_by_name(rbname);
1252
1253         if (!ramblock) {
1254             /* We shouldn't be asked for a non-existent RAMBlock */
1255             error_report("ram_save_queue_pages no block '%s'", rbname);
1256             goto err;
1257         }
1258         rs->last_req_rb = ramblock;
1259     }
1260     trace_ram_save_queue_pages(ramblock->idstr, start, len);
1261     if (start+len > ramblock->used_length) {
1262         error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1263                      RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1264                      __func__, start, len, ramblock->used_length);
1265         goto err;
1266     }
1267
1268     struct RAMSrcPageRequest *new_entry =
1269         g_malloc0(sizeof(struct RAMSrcPageRequest));
1270     new_entry->rb = ramblock;
1271     new_entry->offset = start;
1272     new_entry->len = len;
1273
1274     memory_region_ref(ramblock->mr);
1275     qemu_mutex_lock(&rs->src_page_req_mutex);
1276     QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1277     qemu_mutex_unlock(&rs->src_page_req_mutex);
1278     rcu_read_unlock();
1279
1280     return 0;
1281
1282 err:
1283     rcu_read_unlock();
1284     return -1;
1285 }
1286
1287 /**
1288  * ram_save_target_page: save one target page
1289  *
1290  * Returns the number of pages written
1291  *
1292  * @rs: current RAM state
1293  * @ms: current migration state
1294  * @pss: data about the page we want to send
1295  * @last_stage: if we are at the completion stage
1296  */
1297 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
1298                                 bool last_stage)
1299 {
1300     int res = 0;
1301
1302     /* Check the pages is dirty and if it is send it */
1303     if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
1304         unsigned long *unsentmap;
1305         /*
1306          * If xbzrle is on, stop using the data compression after first
1307          * round of migration even if compression is enabled. In theory,
1308          * xbzrle can do better than compression.
1309          */
1310         unsigned long page =
1311             (pss->block->offset >> TARGET_PAGE_BITS) + pss->page;
1312         if (migrate_use_compression()
1313             && (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
1314             res = ram_save_compressed_page(rs, pss, last_stage);
1315         } else {
1316             res = ram_save_page(rs, pss, last_stage);
1317         }
1318
1319         if (res < 0) {
1320             return res;
1321         }
1322         unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1323         if (unsentmap) {
1324             clear_bit(page, unsentmap);
1325         }
1326     }
1327
1328     return res;
1329 }
1330
1331 /**
1332  * ram_save_host_page: save a whole host page
1333  *
1334  * Starting at *offset send pages up to the end of the current host
1335  * page. It's valid for the initial offset to point into the middle of
1336  * a host page in which case the remainder of the hostpage is sent.
1337  * Only dirty target pages are sent. Note that the host page size may
1338  * be a huge page for this block.
1339  *
1340  * Returns the number of pages written or negative on error
1341  *
1342  * @rs: current RAM state
1343  * @ms: current migration state
1344  * @pss: data about the page we want to send
1345  * @last_stage: if we are at the completion stage
1346  */
1347 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
1348                               bool last_stage)
1349 {
1350     int tmppages, pages = 0;
1351     size_t pagesize_bits =
1352         qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
1353
1354     do {
1355         tmppages = ram_save_target_page(rs, pss, last_stage);
1356         if (tmppages < 0) {
1357             return tmppages;
1358         }
1359
1360         pages += tmppages;
1361         pss->page++;
1362     } while (pss->page & (pagesize_bits - 1));
1363
1364     /* The offset we leave with is the last one we looked at */
1365     pss->page--;
1366     return pages;
1367 }
1368
1369 /**
1370  * ram_find_and_save_block: finds a dirty page and sends it to f
1371  *
1372  * Called within an RCU critical section.
1373  *
1374  * Returns the number of pages written where zero means no dirty pages
1375  *
1376  * @rs: current RAM state
1377  * @last_stage: if we are at the completion stage
1378  *
1379  * On systems where host-page-size > target-page-size it will send all the
1380  * pages in a host page that are dirty.
1381  */
1382
1383 static int ram_find_and_save_block(RAMState *rs, bool last_stage)
1384 {
1385     PageSearchStatus pss;
1386     int pages = 0;
1387     bool again, found;
1388
1389     /* No dirty page as there is zero RAM */
1390     if (!ram_bytes_total()) {
1391         return pages;
1392     }
1393
1394     pss.block = rs->last_seen_block;
1395     pss.page = rs->last_page;
1396     pss.complete_round = false;
1397
1398     if (!pss.block) {
1399         pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1400     }
1401
1402     do {
1403         again = true;
1404         found = get_queued_page(rs, &pss);
1405
1406         if (!found) {
1407             /* priority queue empty, so just search for something dirty */
1408             found = find_dirty_block(rs, &pss, &again);
1409         }
1410
1411         if (found) {
1412             pages = ram_save_host_page(rs, &pss, last_stage);
1413         }
1414     } while (!pages && again);
1415
1416     rs->last_seen_block = pss.block;
1417     rs->last_page = pss.page;
1418
1419     return pages;
1420 }
1421
1422 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1423 {
1424     uint64_t pages = size / TARGET_PAGE_SIZE;
1425     RAMState *rs = &ram_state;
1426
1427     if (zero) {
1428         rs->zero_pages += pages;
1429     } else {
1430         rs->norm_pages += pages;
1431         rs->bytes_transferred += size;
1432         qemu_update_position(f, size);
1433     }
1434 }
1435
1436 uint64_t ram_bytes_total(void)
1437 {
1438     RAMBlock *block;
1439     uint64_t total = 0;
1440
1441     rcu_read_lock();
1442     QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1443         total += block->used_length;
1444     rcu_read_unlock();
1445     return total;
1446 }
1447
1448 void free_xbzrle_decoded_buf(void)
1449 {
1450     g_free(xbzrle_decoded_buf);
1451     xbzrle_decoded_buf = NULL;
1452 }
1453
1454 static void migration_bitmap_free(RAMBitmap *bmap)
1455 {
1456     g_free(bmap->bmap);
1457     g_free(bmap->unsentmap);
1458     g_free(bmap);
1459 }
1460
1461 static void ram_migration_cleanup(void *opaque)
1462 {
1463     RAMState *rs = opaque;
1464
1465     /* caller have hold iothread lock or is in a bh, so there is
1466      * no writing race against this migration_bitmap
1467      */
1468     RAMBitmap *bitmap = rs->ram_bitmap;
1469     atomic_rcu_set(&rs->ram_bitmap, NULL);
1470     if (bitmap) {
1471         memory_global_dirty_log_stop();
1472         call_rcu(bitmap, migration_bitmap_free, rcu);
1473     }
1474
1475     XBZRLE_cache_lock();
1476     if (XBZRLE.cache) {
1477         cache_fini(XBZRLE.cache);
1478         g_free(XBZRLE.encoded_buf);
1479         g_free(XBZRLE.current_buf);
1480         g_free(ZERO_TARGET_PAGE);
1481         XBZRLE.cache = NULL;
1482         XBZRLE.encoded_buf = NULL;
1483         XBZRLE.current_buf = NULL;
1484     }
1485     XBZRLE_cache_unlock();
1486 }
1487
1488 static void ram_state_reset(RAMState *rs)
1489 {
1490     rs->last_seen_block = NULL;
1491     rs->last_sent_block = NULL;
1492     rs->last_page = 0;
1493     rs->last_version = ram_list.version;
1494     rs->ram_bulk_stage = true;
1495 }
1496
1497 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1498
1499 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1500 {
1501     RAMState *rs = &ram_state;
1502
1503     /* called in qemu main thread, so there is
1504      * no writing race against this migration_bitmap
1505      */
1506     if (rs->ram_bitmap) {
1507         RAMBitmap *old_bitmap = rs->ram_bitmap, *bitmap;
1508         bitmap = g_new(RAMBitmap, 1);
1509         bitmap->bmap = bitmap_new(new);
1510
1511         /* prevent migration_bitmap content from being set bit
1512          * by migration_bitmap_sync_range() at the same time.
1513          * it is safe to migration if migration_bitmap is cleared bit
1514          * at the same time.
1515          */
1516         qemu_mutex_lock(&rs->bitmap_mutex);
1517         bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1518         bitmap_set(bitmap->bmap, old, new - old);
1519
1520         /* We don't have a way to safely extend the sentmap
1521          * with RCU; so mark it as missing, entry to postcopy
1522          * will fail.
1523          */
1524         bitmap->unsentmap = NULL;
1525
1526         atomic_rcu_set(&rs->ram_bitmap, bitmap);
1527         qemu_mutex_unlock(&rs->bitmap_mutex);
1528         rs->migration_dirty_pages += new - old;
1529         call_rcu(old_bitmap, migration_bitmap_free, rcu);
1530     }
1531 }
1532
1533 /*
1534  * 'expected' is the value you expect the bitmap mostly to be full
1535  * of; it won't bother printing lines that are all this value.
1536  * If 'todump' is null the migration bitmap is dumped.
1537  */
1538 void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1539 {
1540     unsigned long ram_pages = last_ram_page();
1541     RAMState *rs = &ram_state;
1542     int64_t cur;
1543     int64_t linelen = 128;
1544     char linebuf[129];
1545
1546     if (!todump) {
1547         todump = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1548     }
1549
1550     for (cur = 0; cur < ram_pages; cur += linelen) {
1551         int64_t curb;
1552         bool found = false;
1553         /*
1554          * Last line; catch the case where the line length
1555          * is longer than remaining ram
1556          */
1557         if (cur + linelen > ram_pages) {
1558             linelen = ram_pages - cur;
1559         }
1560         for (curb = 0; curb < linelen; curb++) {
1561             bool thisbit = test_bit(cur + curb, todump);
1562             linebuf[curb] = thisbit ? '1' : '.';
1563             found = found || (thisbit != expected);
1564         }
1565         if (found) {
1566             linebuf[curb] = '\0';
1567             fprintf(stderr,  "0x%08" PRIx64 " : %s\n", cur, linebuf);
1568         }
1569     }
1570 }
1571
1572 /* **** functions for postcopy ***** */
1573
1574 void ram_postcopy_migrated_memory_release(MigrationState *ms)
1575 {
1576     RAMState *rs = &ram_state;
1577     struct RAMBlock *block;
1578     unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1579
1580     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1581         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1582         unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1583         unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1584
1585         while (run_start < range) {
1586             unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1587             ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
1588                               (run_end - run_start) << TARGET_PAGE_BITS);
1589             run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1590         }
1591     }
1592 }
1593
1594 /**
1595  * postcopy_send_discard_bm_ram: discard a RAMBlock
1596  *
1597  * Returns zero on success
1598  *
1599  * Callback from postcopy_each_ram_send_discard for each RAMBlock
1600  * Note: At this point the 'unsentmap' is the processed bitmap combined
1601  *       with the dirtymap; so a '1' means it's either dirty or unsent.
1602  *
1603  * @ms: current migration state
1604  * @pds: state for postcopy
1605  * @start: RAMBlock starting page
1606  * @length: RAMBlock size
1607  */
1608 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1609                                         PostcopyDiscardState *pds,
1610                                         unsigned long start,
1611                                         unsigned long length)
1612 {
1613     RAMState *rs = &ram_state;
1614     unsigned long end = start + length; /* one after the end */
1615     unsigned long current;
1616     unsigned long *unsentmap;
1617
1618     unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1619     for (current = start; current < end; ) {
1620         unsigned long one = find_next_bit(unsentmap, end, current);
1621
1622         if (one <= end) {
1623             unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1624             unsigned long discard_length;
1625
1626             if (zero >= end) {
1627                 discard_length = end - one;
1628             } else {
1629                 discard_length = zero - one;
1630             }
1631             if (discard_length) {
1632                 postcopy_discard_send_range(ms, pds, one, discard_length);
1633             }
1634             current = one + discard_length;
1635         } else {
1636             current = one;
1637         }
1638     }
1639
1640     return 0;
1641 }
1642
1643 /**
1644  * postcopy_each_ram_send_discard: discard all RAMBlocks
1645  *
1646  * Returns 0 for success or negative for error
1647  *
1648  * Utility for the outgoing postcopy code.
1649  *   Calls postcopy_send_discard_bm_ram for each RAMBlock
1650  *   passing it bitmap indexes and name.
1651  * (qemu_ram_foreach_block ends up passing unscaled lengths
1652  *  which would mean postcopy code would have to deal with target page)
1653  *
1654  * @ms: current migration state
1655  */
1656 static int postcopy_each_ram_send_discard(MigrationState *ms)
1657 {
1658     struct RAMBlock *block;
1659     int ret;
1660
1661     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1662         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1663         PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1664                                                                first,
1665                                                                block->idstr);
1666
1667         /*
1668          * Postcopy sends chunks of bitmap over the wire, but it
1669          * just needs indexes at this point, avoids it having
1670          * target page specific code.
1671          */
1672         ret = postcopy_send_discard_bm_ram(ms, pds, first,
1673                                     block->used_length >> TARGET_PAGE_BITS);
1674         postcopy_discard_send_finish(ms, pds);
1675         if (ret) {
1676             return ret;
1677         }
1678     }
1679
1680     return 0;
1681 }
1682
1683 /**
1684  * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1685  *
1686  * Helper for postcopy_chunk_hostpages; it's called twice to
1687  * canonicalize the two bitmaps, that are similar, but one is
1688  * inverted.
1689  *
1690  * Postcopy requires that all target pages in a hostpage are dirty or
1691  * clean, not a mix.  This function canonicalizes the bitmaps.
1692  *
1693  * @ms: current migration state
1694  * @unsent_pass: if true we need to canonicalize partially unsent host pages
1695  *               otherwise we need to canonicalize partially dirty host pages
1696  * @block: block that contains the page we want to canonicalize
1697  * @pds: state for postcopy
1698  */
1699 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1700                                           RAMBlock *block,
1701                                           PostcopyDiscardState *pds)
1702 {
1703     RAMState *rs = &ram_state;
1704     unsigned long *bitmap;
1705     unsigned long *unsentmap;
1706     unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
1707     unsigned long first = block->offset >> TARGET_PAGE_BITS;
1708     unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1709     unsigned long last = first + (len - 1);
1710     unsigned long run_start;
1711
1712     if (block->page_size == TARGET_PAGE_SIZE) {
1713         /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1714         return;
1715     }
1716
1717     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1718     unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1719
1720     if (unsent_pass) {
1721         /* Find a sent page */
1722         run_start = find_next_zero_bit(unsentmap, last + 1, first);
1723     } else {
1724         /* Find a dirty page */
1725         run_start = find_next_bit(bitmap, last + 1, first);
1726     }
1727
1728     while (run_start <= last) {
1729         bool do_fixup = false;
1730         unsigned long fixup_start_addr;
1731         unsigned long host_offset;
1732
1733         /*
1734          * If the start of this run of pages is in the middle of a host
1735          * page, then we need to fixup this host page.
1736          */
1737         host_offset = run_start % host_ratio;
1738         if (host_offset) {
1739             do_fixup = true;
1740             run_start -= host_offset;
1741             fixup_start_addr = run_start;
1742             /* For the next pass */
1743             run_start = run_start + host_ratio;
1744         } else {
1745             /* Find the end of this run */
1746             unsigned long run_end;
1747             if (unsent_pass) {
1748                 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1749             } else {
1750                 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1751             }
1752             /*
1753              * If the end isn't at the start of a host page, then the
1754              * run doesn't finish at the end of a host page
1755              * and we need to discard.
1756              */
1757             host_offset = run_end % host_ratio;
1758             if (host_offset) {
1759                 do_fixup = true;
1760                 fixup_start_addr = run_end - host_offset;
1761                 /*
1762                  * This host page has gone, the next loop iteration starts
1763                  * from after the fixup
1764                  */
1765                 run_start = fixup_start_addr + host_ratio;
1766             } else {
1767                 /*
1768                  * No discards on this iteration, next loop starts from
1769                  * next sent/dirty page
1770                  */
1771                 run_start = run_end + 1;
1772             }
1773         }
1774
1775         if (do_fixup) {
1776             unsigned long page;
1777
1778             /* Tell the destination to discard this page */
1779             if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1780                 /* For the unsent_pass we:
1781                  *     discard partially sent pages
1782                  * For the !unsent_pass (dirty) we:
1783                  *     discard partially dirty pages that were sent
1784                  *     (any partially sent pages were already discarded
1785                  *     by the previous unsent_pass)
1786                  */
1787                 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1788                                             host_ratio);
1789             }
1790
1791             /* Clean up the bitmap */
1792             for (page = fixup_start_addr;
1793                  page < fixup_start_addr + host_ratio; page++) {
1794                 /* All pages in this host page are now not sent */
1795                 set_bit(page, unsentmap);
1796
1797                 /*
1798                  * Remark them as dirty, updating the count for any pages
1799                  * that weren't previously dirty.
1800                  */
1801                 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
1802             }
1803         }
1804
1805         if (unsent_pass) {
1806             /* Find the next sent page for the next iteration */
1807             run_start = find_next_zero_bit(unsentmap, last + 1,
1808                                            run_start);
1809         } else {
1810             /* Find the next dirty page for the next iteration */
1811             run_start = find_next_bit(bitmap, last + 1, run_start);
1812         }
1813     }
1814 }
1815
1816 /**
1817  * postcopy_chuck_hostpages: discrad any partially sent host page
1818  *
1819  * Utility for the outgoing postcopy code.
1820  *
1821  * Discard any partially sent host-page size chunks, mark any partially
1822  * dirty host-page size chunks as all dirty.  In this case the host-page
1823  * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1824  *
1825  * Returns zero on success
1826  *
1827  * @ms: current migration state
1828  */
1829 static int postcopy_chunk_hostpages(MigrationState *ms)
1830 {
1831     RAMState *rs = &ram_state;
1832     struct RAMBlock *block;
1833
1834     /* Easiest way to make sure we don't resume in the middle of a host-page */
1835     rs->last_seen_block = NULL;
1836     rs->last_sent_block = NULL;
1837     rs->last_page = 0;
1838
1839     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1840         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1841
1842         PostcopyDiscardState *pds =
1843                          postcopy_discard_send_init(ms, first, block->idstr);
1844
1845         /* First pass: Discard all partially sent host pages */
1846         postcopy_chunk_hostpages_pass(ms, true, block, pds);
1847         /*
1848          * Second pass: Ensure that all partially dirty host pages are made
1849          * fully dirty.
1850          */
1851         postcopy_chunk_hostpages_pass(ms, false, block, pds);
1852
1853         postcopy_discard_send_finish(ms, pds);
1854     } /* ram_list loop */
1855
1856     return 0;
1857 }
1858
1859 /**
1860  * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1861  *
1862  * Returns zero on success
1863  *
1864  * Transmit the set of pages to be discarded after precopy to the target
1865  * these are pages that:
1866  *     a) Have been previously transmitted but are now dirty again
1867  *     b) Pages that have never been transmitted, this ensures that
1868  *        any pages on the destination that have been mapped by background
1869  *        tasks get discarded (transparent huge pages is the specific concern)
1870  * Hopefully this is pretty sparse
1871  *
1872  * @ms: current migration state
1873  */
1874 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1875 {
1876     RAMState *rs = &ram_state;
1877     int ret;
1878     unsigned long *bitmap, *unsentmap;
1879
1880     rcu_read_lock();
1881
1882     /* This should be our last sync, the src is now paused */
1883     migration_bitmap_sync(rs);
1884
1885     unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1886     if (!unsentmap) {
1887         /* We don't have a safe way to resize the sentmap, so
1888          * if the bitmap was resized it will be NULL at this
1889          * point.
1890          */
1891         error_report("migration ram resized during precopy phase");
1892         rcu_read_unlock();
1893         return -EINVAL;
1894     }
1895
1896     /* Deal with TPS != HPS and huge pages */
1897     ret = postcopy_chunk_hostpages(ms);
1898     if (ret) {
1899         rcu_read_unlock();
1900         return ret;
1901     }
1902
1903     /*
1904      * Update the unsentmap to be unsentmap = unsentmap | dirty
1905      */
1906     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1907     bitmap_or(unsentmap, unsentmap, bitmap, last_ram_page());
1908
1909
1910     trace_ram_postcopy_send_discard_bitmap();
1911 #ifdef DEBUG_POSTCOPY
1912     ram_debug_dump_bitmap(unsentmap, true);
1913 #endif
1914
1915     ret = postcopy_each_ram_send_discard(ms);
1916     rcu_read_unlock();
1917
1918     return ret;
1919 }
1920
1921 /**
1922  * ram_discard_range: discard dirtied pages at the beginning of postcopy
1923  *
1924  * Returns zero on success
1925  *
1926  * @rbname: name of the RAMBlock of the request. NULL means the
1927  *          same that last one.
1928  * @start: RAMBlock starting page
1929  * @length: RAMBlock size
1930  */
1931 int ram_discard_range(const char *rbname, uint64_t start, size_t length)
1932 {
1933     int ret = -1;
1934
1935     trace_ram_discard_range(rbname, start, length);
1936
1937     rcu_read_lock();
1938     RAMBlock *rb = qemu_ram_block_by_name(rbname);
1939
1940     if (!rb) {
1941         error_report("ram_discard_range: Failed to find block '%s'", rbname);
1942         goto err;
1943     }
1944
1945     ret = ram_block_discard_range(rb, start, length);
1946
1947 err:
1948     rcu_read_unlock();
1949
1950     return ret;
1951 }
1952
1953 static int ram_state_init(RAMState *rs)
1954 {
1955     unsigned long ram_bitmap_pages;
1956
1957     memset(rs, 0, sizeof(*rs));
1958     qemu_mutex_init(&rs->bitmap_mutex);
1959     qemu_mutex_init(&rs->src_page_req_mutex);
1960     QSIMPLEQ_INIT(&rs->src_page_requests);
1961
1962     if (migrate_use_xbzrle()) {
1963         XBZRLE_cache_lock();
1964         ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
1965         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1966                                   TARGET_PAGE_SIZE,
1967                                   TARGET_PAGE_SIZE);
1968         if (!XBZRLE.cache) {
1969             XBZRLE_cache_unlock();
1970             error_report("Error creating cache");
1971             return -1;
1972         }
1973         XBZRLE_cache_unlock();
1974
1975         /* We prefer not to abort if there is no memory */
1976         XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1977         if (!XBZRLE.encoded_buf) {
1978             error_report("Error allocating encoded_buf");
1979             return -1;
1980         }
1981
1982         XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1983         if (!XBZRLE.current_buf) {
1984             error_report("Error allocating current_buf");
1985             g_free(XBZRLE.encoded_buf);
1986             XBZRLE.encoded_buf = NULL;
1987             return -1;
1988         }
1989     }
1990
1991     /* For memory_global_dirty_log_start below.  */
1992     qemu_mutex_lock_iothread();
1993
1994     qemu_mutex_lock_ramlist();
1995     rcu_read_lock();
1996     ram_state_reset(rs);
1997
1998     rs->ram_bitmap = g_new0(RAMBitmap, 1);
1999     /* Skip setting bitmap if there is no RAM */
2000     if (ram_bytes_total()) {
2001         ram_bitmap_pages = last_ram_page();
2002         rs->ram_bitmap->bmap = bitmap_new(ram_bitmap_pages);
2003         bitmap_set(rs->ram_bitmap->bmap, 0, ram_bitmap_pages);
2004
2005         if (migrate_postcopy_ram()) {
2006             rs->ram_bitmap->unsentmap = bitmap_new(ram_bitmap_pages);
2007             bitmap_set(rs->ram_bitmap->unsentmap, 0, ram_bitmap_pages);
2008         }
2009     }
2010
2011     /*
2012      * Count the total number of pages used by ram blocks not including any
2013      * gaps due to alignment or unplugs.
2014      */
2015     rs->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2016
2017     memory_global_dirty_log_start();
2018     migration_bitmap_sync(rs);
2019     qemu_mutex_unlock_ramlist();
2020     qemu_mutex_unlock_iothread();
2021     rcu_read_unlock();
2022
2023     return 0;
2024 }
2025
2026 /*
2027  * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2028  * long-running RCU critical section.  When rcu-reclaims in the code
2029  * start to become numerous it will be necessary to reduce the
2030  * granularity of these critical sections.
2031  */
2032
2033 /**
2034  * ram_save_setup: Setup RAM for migration
2035  *
2036  * Returns zero to indicate success and negative for error
2037  *
2038  * @f: QEMUFile where to send the data
2039  * @opaque: RAMState pointer
2040  */
2041 static int ram_save_setup(QEMUFile *f, void *opaque)
2042 {
2043     RAMState *rs = opaque;
2044     RAMBlock *block;
2045
2046     /* migration has already setup the bitmap, reuse it. */
2047     if (!migration_in_colo_state()) {
2048         if (ram_state_init(rs) < 0) {
2049             return -1;
2050          }
2051     }
2052     rs->f = f;
2053
2054     rcu_read_lock();
2055
2056     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2057
2058     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2059         qemu_put_byte(f, strlen(block->idstr));
2060         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2061         qemu_put_be64(f, block->used_length);
2062         if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2063             qemu_put_be64(f, block->page_size);
2064         }
2065     }
2066
2067     rcu_read_unlock();
2068
2069     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2070     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2071
2072     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2073
2074     return 0;
2075 }
2076
2077 /**
2078  * ram_save_iterate: iterative stage for migration
2079  *
2080  * Returns zero to indicate success and negative for error
2081  *
2082  * @f: QEMUFile where to send the data
2083  * @opaque: RAMState pointer
2084  */
2085 static int ram_save_iterate(QEMUFile *f, void *opaque)
2086 {
2087     RAMState *rs = opaque;
2088     int ret;
2089     int i;
2090     int64_t t0;
2091     int done = 0;
2092
2093     rcu_read_lock();
2094     if (ram_list.version != rs->last_version) {
2095         ram_state_reset(rs);
2096     }
2097
2098     /* Read version before ram_list.blocks */
2099     smp_rmb();
2100
2101     ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2102
2103     t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2104     i = 0;
2105     while ((ret = qemu_file_rate_limit(f)) == 0) {
2106         int pages;
2107
2108         pages = ram_find_and_save_block(rs, false);
2109         /* no more pages to sent */
2110         if (pages == 0) {
2111             done = 1;
2112             break;
2113         }
2114         rs->iterations++;
2115
2116         /* we want to check in the 1st loop, just in case it was the 1st time
2117            and we had to sync the dirty bitmap.
2118            qemu_get_clock_ns() is a bit expensive, so we only check each some
2119            iterations
2120         */
2121         if ((i & 63) == 0) {
2122             uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2123             if (t1 > MAX_WAIT) {
2124                 trace_ram_save_iterate_big_wait(t1, i);
2125                 break;
2126             }
2127         }
2128         i++;
2129     }
2130     flush_compressed_data(rs);
2131     rcu_read_unlock();
2132
2133     /*
2134      * Must occur before EOS (or any QEMUFile operation)
2135      * because of RDMA protocol.
2136      */
2137     ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2138
2139     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2140     rs->bytes_transferred += 8;
2141
2142     ret = qemu_file_get_error(f);
2143     if (ret < 0) {
2144         return ret;
2145     }
2146
2147     return done;
2148 }
2149
2150 /**
2151  * ram_save_complete: function called to send the remaining amount of ram
2152  *
2153  * Returns zero to indicate success
2154  *
2155  * Called with iothread lock
2156  *
2157  * @f: QEMUFile where to send the data
2158  * @opaque: RAMState pointer
2159  */
2160 static int ram_save_complete(QEMUFile *f, void *opaque)
2161 {
2162     RAMState *rs = opaque;
2163
2164     rcu_read_lock();
2165
2166     if (!migration_in_postcopy()) {
2167         migration_bitmap_sync(rs);
2168     }
2169
2170     ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2171
2172     /* try transferring iterative blocks of memory */
2173
2174     /* flush all remaining blocks regardless of rate limiting */
2175     while (true) {
2176         int pages;
2177
2178         pages = ram_find_and_save_block(rs, !migration_in_colo_state());
2179         /* no more blocks to sent */
2180         if (pages == 0) {
2181             break;
2182         }
2183     }
2184
2185     flush_compressed_data(rs);
2186     ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2187
2188     rcu_read_unlock();
2189
2190     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2191
2192     return 0;
2193 }
2194
2195 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2196                              uint64_t *non_postcopiable_pending,
2197                              uint64_t *postcopiable_pending)
2198 {
2199     RAMState *rs = opaque;
2200     uint64_t remaining_size;
2201
2202     remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2203
2204     if (!migration_in_postcopy() &&
2205         remaining_size < max_size) {
2206         qemu_mutex_lock_iothread();
2207         rcu_read_lock();
2208         migration_bitmap_sync(rs);
2209         rcu_read_unlock();
2210         qemu_mutex_unlock_iothread();
2211         remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2212     }
2213
2214     /* We can do postcopy, and all the data is postcopiable */
2215     *postcopiable_pending += remaining_size;
2216 }
2217
2218 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2219 {
2220     unsigned int xh_len;
2221     int xh_flags;
2222     uint8_t *loaded_data;
2223
2224     if (!xbzrle_decoded_buf) {
2225         xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2226     }
2227     loaded_data = xbzrle_decoded_buf;
2228
2229     /* extract RLE header */
2230     xh_flags = qemu_get_byte(f);
2231     xh_len = qemu_get_be16(f);
2232
2233     if (xh_flags != ENCODING_FLAG_XBZRLE) {
2234         error_report("Failed to load XBZRLE page - wrong compression!");
2235         return -1;
2236     }
2237
2238     if (xh_len > TARGET_PAGE_SIZE) {
2239         error_report("Failed to load XBZRLE page - len overflow!");
2240         return -1;
2241     }
2242     /* load data and decode */
2243     qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2244
2245     /* decode RLE */
2246     if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2247                              TARGET_PAGE_SIZE) == -1) {
2248         error_report("Failed to load XBZRLE page - decode error!");
2249         return -1;
2250     }
2251
2252     return 0;
2253 }
2254
2255 /**
2256  * ram_block_from_stream: read a RAMBlock id from the migration stream
2257  *
2258  * Must be called from within a rcu critical section.
2259  *
2260  * Returns a pointer from within the RCU-protected ram_list.
2261  *
2262  * @f: QEMUFile where to read the data from
2263  * @flags: Page flags (mostly to see if it's a continuation of previous block)
2264  */
2265 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
2266 {
2267     static RAMBlock *block = NULL;
2268     char id[256];
2269     uint8_t len;
2270
2271     if (flags & RAM_SAVE_FLAG_CONTINUE) {
2272         if (!block) {
2273             error_report("Ack, bad migration stream!");
2274             return NULL;
2275         }
2276         return block;
2277     }
2278
2279     len = qemu_get_byte(f);
2280     qemu_get_buffer(f, (uint8_t *)id, len);
2281     id[len] = 0;
2282
2283     block = qemu_ram_block_by_name(id);
2284     if (!block) {
2285         error_report("Can't find block %s", id);
2286         return NULL;
2287     }
2288
2289     return block;
2290 }
2291
2292 static inline void *host_from_ram_block_offset(RAMBlock *block,
2293                                                ram_addr_t offset)
2294 {
2295     if (!offset_in_ramblock(block, offset)) {
2296         return NULL;
2297     }
2298
2299     return block->host + offset;
2300 }
2301
2302 /**
2303  * ram_handle_compressed: handle the zero page case
2304  *
2305  * If a page (or a whole RDMA chunk) has been
2306  * determined to be zero, then zap it.
2307  *
2308  * @host: host address for the zero page
2309  * @ch: what the page is filled from.  We only support zero
2310  * @size: size of the zero page
2311  */
2312 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2313 {
2314     if (ch != 0 || !is_zero_range(host, size)) {
2315         memset(host, ch, size);
2316     }
2317 }
2318
2319 static void *do_data_decompress(void *opaque)
2320 {
2321     DecompressParam *param = opaque;
2322     unsigned long pagesize;
2323     uint8_t *des;
2324     int len;
2325
2326     qemu_mutex_lock(&param->mutex);
2327     while (!param->quit) {
2328         if (param->des) {
2329             des = param->des;
2330             len = param->len;
2331             param->des = 0;
2332             qemu_mutex_unlock(&param->mutex);
2333
2334             pagesize = TARGET_PAGE_SIZE;
2335             /* uncompress() will return failed in some case, especially
2336              * when the page is dirted when doing the compression, it's
2337              * not a problem because the dirty page will be retransferred
2338              * and uncompress() won't break the data in other pages.
2339              */
2340             uncompress((Bytef *)des, &pagesize,
2341                        (const Bytef *)param->compbuf, len);
2342
2343             qemu_mutex_lock(&decomp_done_lock);
2344             param->done = true;
2345             qemu_cond_signal(&decomp_done_cond);
2346             qemu_mutex_unlock(&decomp_done_lock);
2347
2348             qemu_mutex_lock(&param->mutex);
2349         } else {
2350             qemu_cond_wait(&param->cond, &param->mutex);
2351         }
2352     }
2353     qemu_mutex_unlock(&param->mutex);
2354
2355     return NULL;
2356 }
2357
2358 static void wait_for_decompress_done(void)
2359 {
2360     int idx, thread_count;
2361
2362     if (!migrate_use_compression()) {
2363         return;
2364     }
2365
2366     thread_count = migrate_decompress_threads();
2367     qemu_mutex_lock(&decomp_done_lock);
2368     for (idx = 0; idx < thread_count; idx++) {
2369         while (!decomp_param[idx].done) {
2370             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2371         }
2372     }
2373     qemu_mutex_unlock(&decomp_done_lock);
2374 }
2375
2376 void migrate_decompress_threads_create(void)
2377 {
2378     int i, thread_count;
2379
2380     thread_count = migrate_decompress_threads();
2381     decompress_threads = g_new0(QemuThread, thread_count);
2382     decomp_param = g_new0(DecompressParam, thread_count);
2383     qemu_mutex_init(&decomp_done_lock);
2384     qemu_cond_init(&decomp_done_cond);
2385     for (i = 0; i < thread_count; i++) {
2386         qemu_mutex_init(&decomp_param[i].mutex);
2387         qemu_cond_init(&decomp_param[i].cond);
2388         decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2389         decomp_param[i].done = true;
2390         decomp_param[i].quit = false;
2391         qemu_thread_create(decompress_threads + i, "decompress",
2392                            do_data_decompress, decomp_param + i,
2393                            QEMU_THREAD_JOINABLE);
2394     }
2395 }
2396
2397 void migrate_decompress_threads_join(void)
2398 {
2399     int i, thread_count;
2400
2401     thread_count = migrate_decompress_threads();
2402     for (i = 0; i < thread_count; i++) {
2403         qemu_mutex_lock(&decomp_param[i].mutex);
2404         decomp_param[i].quit = true;
2405         qemu_cond_signal(&decomp_param[i].cond);
2406         qemu_mutex_unlock(&decomp_param[i].mutex);
2407     }
2408     for (i = 0; i < thread_count; i++) {
2409         qemu_thread_join(decompress_threads + i);
2410         qemu_mutex_destroy(&decomp_param[i].mutex);
2411         qemu_cond_destroy(&decomp_param[i].cond);
2412         g_free(decomp_param[i].compbuf);
2413     }
2414     g_free(decompress_threads);
2415     g_free(decomp_param);
2416     decompress_threads = NULL;
2417     decomp_param = NULL;
2418 }
2419
2420 static void decompress_data_with_multi_threads(QEMUFile *f,
2421                                                void *host, int len)
2422 {
2423     int idx, thread_count;
2424
2425     thread_count = migrate_decompress_threads();
2426     qemu_mutex_lock(&decomp_done_lock);
2427     while (true) {
2428         for (idx = 0; idx < thread_count; idx++) {
2429             if (decomp_param[idx].done) {
2430                 decomp_param[idx].done = false;
2431                 qemu_mutex_lock(&decomp_param[idx].mutex);
2432                 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2433                 decomp_param[idx].des = host;
2434                 decomp_param[idx].len = len;
2435                 qemu_cond_signal(&decomp_param[idx].cond);
2436                 qemu_mutex_unlock(&decomp_param[idx].mutex);
2437                 break;
2438             }
2439         }
2440         if (idx < thread_count) {
2441             break;
2442         } else {
2443             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2444         }
2445     }
2446     qemu_mutex_unlock(&decomp_done_lock);
2447 }
2448
2449 /**
2450  * ram_postcopy_incoming_init: allocate postcopy data structures
2451  *
2452  * Returns 0 for success and negative if there was one error
2453  *
2454  * @mis: current migration incoming state
2455  *
2456  * Allocate data structures etc needed by incoming migration with
2457  * postcopy-ram. postcopy-ram's similarly names
2458  * postcopy_ram_incoming_init does the work.
2459  */
2460 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2461 {
2462     unsigned long ram_pages = last_ram_page();
2463
2464     return postcopy_ram_incoming_init(mis, ram_pages);
2465 }
2466
2467 /**
2468  * ram_load_postcopy: load a page in postcopy case
2469  *
2470  * Returns 0 for success or -errno in case of error
2471  *
2472  * Called in postcopy mode by ram_load().
2473  * rcu_read_lock is taken prior to this being called.
2474  *
2475  * @f: QEMUFile where to send the data
2476  */
2477 static int ram_load_postcopy(QEMUFile *f)
2478 {
2479     int flags = 0, ret = 0;
2480     bool place_needed = false;
2481     bool matching_page_sizes = false;
2482     MigrationIncomingState *mis = migration_incoming_get_current();
2483     /* Temporary page that is later 'placed' */
2484     void *postcopy_host_page = postcopy_get_tmp_page(mis);
2485     void *last_host = NULL;
2486     bool all_zero = false;
2487
2488     while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2489         ram_addr_t addr;
2490         void *host = NULL;
2491         void *page_buffer = NULL;
2492         void *place_source = NULL;
2493         RAMBlock *block = NULL;
2494         uint8_t ch;
2495
2496         addr = qemu_get_be64(f);
2497         flags = addr & ~TARGET_PAGE_MASK;
2498         addr &= TARGET_PAGE_MASK;
2499
2500         trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2501         place_needed = false;
2502         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
2503             block = ram_block_from_stream(f, flags);
2504
2505             host = host_from_ram_block_offset(block, addr);
2506             if (!host) {
2507                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2508                 ret = -EINVAL;
2509                 break;
2510             }
2511             matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
2512             /*
2513              * Postcopy requires that we place whole host pages atomically;
2514              * these may be huge pages for RAMBlocks that are backed by
2515              * hugetlbfs.
2516              * To make it atomic, the data is read into a temporary page
2517              * that's moved into place later.
2518              * The migration protocol uses,  possibly smaller, target-pages
2519              * however the source ensures it always sends all the components
2520              * of a host page in order.
2521              */
2522             page_buffer = postcopy_host_page +
2523                           ((uintptr_t)host & (block->page_size - 1));
2524             /* If all TP are zero then we can optimise the place */
2525             if (!((uintptr_t)host & (block->page_size - 1))) {
2526                 all_zero = true;
2527             } else {
2528                 /* not the 1st TP within the HP */
2529                 if (host != (last_host + TARGET_PAGE_SIZE)) {
2530                     error_report("Non-sequential target page %p/%p",
2531                                   host, last_host);
2532                     ret = -EINVAL;
2533                     break;
2534                 }
2535             }
2536
2537
2538             /*
2539              * If it's the last part of a host page then we place the host
2540              * page
2541              */
2542             place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2543                                      (block->page_size - 1)) == 0;
2544             place_source = postcopy_host_page;
2545         }
2546         last_host = host;
2547
2548         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2549         case RAM_SAVE_FLAG_COMPRESS:
2550             ch = qemu_get_byte(f);
2551             memset(page_buffer, ch, TARGET_PAGE_SIZE);
2552             if (ch) {
2553                 all_zero = false;
2554             }
2555             break;
2556
2557         case RAM_SAVE_FLAG_PAGE:
2558             all_zero = false;
2559             if (!place_needed || !matching_page_sizes) {
2560                 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2561             } else {
2562                 /* Avoids the qemu_file copy during postcopy, which is
2563                  * going to do a copy later; can only do it when we
2564                  * do this read in one go (matching page sizes)
2565                  */
2566                 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2567                                          TARGET_PAGE_SIZE);
2568             }
2569             break;
2570         case RAM_SAVE_FLAG_EOS:
2571             /* normal exit */
2572             break;
2573         default:
2574             error_report("Unknown combination of migration flags: %#x"
2575                          " (postcopy mode)", flags);
2576             ret = -EINVAL;
2577         }
2578
2579         if (place_needed) {
2580             /* This gets called at the last target page in the host page */
2581             void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2582
2583             if (all_zero) {
2584                 ret = postcopy_place_page_zero(mis, place_dest,
2585                                                block->page_size);
2586             } else {
2587                 ret = postcopy_place_page(mis, place_dest,
2588                                           place_source, block->page_size);
2589             }
2590         }
2591         if (!ret) {
2592             ret = qemu_file_get_error(f);
2593         }
2594     }
2595
2596     return ret;
2597 }
2598
2599 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2600 {
2601     int flags = 0, ret = 0;
2602     static uint64_t seq_iter;
2603     int len = 0;
2604     /*
2605      * If system is running in postcopy mode, page inserts to host memory must
2606      * be atomic
2607      */
2608     bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2609     /* ADVISE is earlier, it shows the source has the postcopy capability on */
2610     bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
2611
2612     seq_iter++;
2613
2614     if (version_id != 4) {
2615         ret = -EINVAL;
2616     }
2617
2618     /* This RCU critical section can be very long running.
2619      * When RCU reclaims in the code start to become numerous,
2620      * it will be necessary to reduce the granularity of this
2621      * critical section.
2622      */
2623     rcu_read_lock();
2624
2625     if (postcopy_running) {
2626         ret = ram_load_postcopy(f);
2627     }
2628
2629     while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2630         ram_addr_t addr, total_ram_bytes;
2631         void *host = NULL;
2632         uint8_t ch;
2633
2634         addr = qemu_get_be64(f);
2635         flags = addr & ~TARGET_PAGE_MASK;
2636         addr &= TARGET_PAGE_MASK;
2637
2638         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2639                      RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2640             RAMBlock *block = ram_block_from_stream(f, flags);
2641
2642             host = host_from_ram_block_offset(block, addr);
2643             if (!host) {
2644                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2645                 ret = -EINVAL;
2646                 break;
2647             }
2648         }
2649
2650         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2651         case RAM_SAVE_FLAG_MEM_SIZE:
2652             /* Synchronize RAM block list */
2653             total_ram_bytes = addr;
2654             while (!ret && total_ram_bytes) {
2655                 RAMBlock *block;
2656                 char id[256];
2657                 ram_addr_t length;
2658
2659                 len = qemu_get_byte(f);
2660                 qemu_get_buffer(f, (uint8_t *)id, len);
2661                 id[len] = 0;
2662                 length = qemu_get_be64(f);
2663
2664                 block = qemu_ram_block_by_name(id);
2665                 if (block) {
2666                     if (length != block->used_length) {
2667                         Error *local_err = NULL;
2668
2669                         ret = qemu_ram_resize(block, length,
2670                                               &local_err);
2671                         if (local_err) {
2672                             error_report_err(local_err);
2673                         }
2674                     }
2675                     /* For postcopy we need to check hugepage sizes match */
2676                     if (postcopy_advised &&
2677                         block->page_size != qemu_host_page_size) {
2678                         uint64_t remote_page_size = qemu_get_be64(f);
2679                         if (remote_page_size != block->page_size) {
2680                             error_report("Mismatched RAM page size %s "
2681                                          "(local) %zd != %" PRId64,
2682                                          id, block->page_size,
2683                                          remote_page_size);
2684                             ret = -EINVAL;
2685                         }
2686                     }
2687                     ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2688                                           block->idstr);
2689                 } else {
2690                     error_report("Unknown ramblock \"%s\", cannot "
2691                                  "accept migration", id);
2692                     ret = -EINVAL;
2693                 }
2694
2695                 total_ram_bytes -= length;
2696             }
2697             break;
2698
2699         case RAM_SAVE_FLAG_COMPRESS:
2700             ch = qemu_get_byte(f);
2701             ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2702             break;
2703
2704         case RAM_SAVE_FLAG_PAGE:
2705             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2706             break;
2707
2708         case RAM_SAVE_FLAG_COMPRESS_PAGE:
2709             len = qemu_get_be32(f);
2710             if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2711                 error_report("Invalid compressed data length: %d", len);
2712                 ret = -EINVAL;
2713                 break;
2714             }
2715             decompress_data_with_multi_threads(f, host, len);
2716             break;
2717
2718         case RAM_SAVE_FLAG_XBZRLE:
2719             if (load_xbzrle(f, addr, host) < 0) {
2720                 error_report("Failed to decompress XBZRLE page at "
2721                              RAM_ADDR_FMT, addr);
2722                 ret = -EINVAL;
2723                 break;
2724             }
2725             break;
2726         case RAM_SAVE_FLAG_EOS:
2727             /* normal exit */
2728             break;
2729         default:
2730             if (flags & RAM_SAVE_FLAG_HOOK) {
2731                 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2732             } else {
2733                 error_report("Unknown combination of migration flags: %#x",
2734                              flags);
2735                 ret = -EINVAL;
2736             }
2737         }
2738         if (!ret) {
2739             ret = qemu_file_get_error(f);
2740         }
2741     }
2742
2743     wait_for_decompress_done();
2744     rcu_read_unlock();
2745     trace_ram_load_complete(ret, seq_iter);
2746     return ret;
2747 }
2748
2749 static SaveVMHandlers savevm_ram_handlers = {
2750     .save_live_setup = ram_save_setup,
2751     .save_live_iterate = ram_save_iterate,
2752     .save_live_complete_postcopy = ram_save_complete,
2753     .save_live_complete_precopy = ram_save_complete,
2754     .save_live_pending = ram_save_pending,
2755     .load_state = ram_load,
2756     .cleanup = ram_migration_cleanup,
2757 };
2758
2759 void ram_mig_init(void)
2760 {
2761     qemu_mutex_init(&XBZRLE.lock);
2762     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
2763 }