migration/ram.c

   1 /*
   2  * QEMU System Emulator
   3  *
   4  * Copyright (c) 2003-2008 Fabrice Bellard
   5  * Copyright (c) 2011-2015 Red Hat Inc
   6  *
   7  * Authors:
   8  *  Juan Quintela <quintela@redhat.com>
   9  *
  10  * Permission is hereby granted, free of charge, to any person obtaining a copy
  11  * of this software and associated documentation files (the "Software"), to deal
  12  * in the Software without restriction, including without limitation the rights
  13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14  * copies of the Software, and to permit persons to whom the Software is
  15  * furnished to do so, subject to the following conditions:
  16  *
  17  * The above copyright notice and this permission notice shall be included in
  18  * all copies or substantial portions of the Software.
  19  *
  20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26  * THE SOFTWARE.
  27  */
  28 #include "qemu/osdep.h"
  29 #include "qemu-common.h"
  30 #include "cpu.h"
  31 #include <zlib.h>
  32 #include "qapi-event.h"
  33 #include "qemu/cutils.h"
  34 #include "qemu/bitops.h"
  35 #include "qemu/bitmap.h"
  36 #include "qemu/timer.h"
  37 #include "qemu/main-loop.h"
  38 #include "migration/migration.h"
  39 #include "migration/postcopy-ram.h"
  40 #include "exec/address-spaces.h"
  41 #include "migration/page_cache.h"
  42 #include "qemu/error-report.h"
  43 #include "trace.h"
  44 #include "exec/ram_addr.h"
  45 #include "qemu/rcu_queue.h"
  46 #include "migration/colo.h"
  47
  48 /***********************************************************/
  49 /* ram save/restore */
  50
  51 #define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
  52 #define RAM_SAVE_FLAG_COMPRESS 0x02
  53 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
  54 #define RAM_SAVE_FLAG_PAGE     0x08
  55 #define RAM_SAVE_FLAG_EOS      0x10
  56 #define RAM_SAVE_FLAG_CONTINUE 0x20
  57 #define RAM_SAVE_FLAG_XBZRLE   0x40
  58 /* 0x80 is reserved in migration.h start with 0x100 next */
  59 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
  60
  61 static uint8_t *ZERO_TARGET_PAGE;
  62
  63 static inline bool is_zero_range(uint8_t *p, uint64_t size)
  64 {
  65     return buffer_is_zero(p, size);
  66 }
  67
  68 /* struct contains XBZRLE cache and a static page
  69    used by the compression */
  70 static struct {
  71     /* buffer used for XBZRLE encoding */
  72     uint8_t *encoded_buf;
  73     /* buffer for storing page content */
  74     uint8_t *current_buf;
  75     /* Cache for XBZRLE, Protected by lock. */
  76     PageCache *cache;
  77     QemuMutex lock;
  78 } XBZRLE;
  79
  80 /* buffer used for XBZRLE decoding */
  81 static uint8_t *xbzrle_decoded_buf;
  82
  83 static void XBZRLE_cache_lock(void)
  84 {
  85     if (migrate_use_xbzrle())
  86         qemu_mutex_lock(&XBZRLE.lock);
  87 }
  88
  89 static void XBZRLE_cache_unlock(void)
  90 {
  91     if (migrate_use_xbzrle())
  92         qemu_mutex_unlock(&XBZRLE.lock);
  93 }
  94
  95 /**
  96  * xbzrle_cache_resize: resize the xbzrle cache
  97  *
  98  * This function is called from qmp_migrate_set_cache_size in main
  99  * thread, possibly while a migration is in progress.  A running
 100  * migration may be using the cache and might finish during this call,
 101  * hence changes to the cache are protected by XBZRLE.lock().
 102  *
 103  * Returns the new_size or negative in case of error.
 104  *
 105  * @new_size: new cache size
 106  */
 107 int64_t xbzrle_cache_resize(int64_t new_size)
 108 {
 109     PageCache *new_cache;
 110     int64_t ret;
 111
 112     if (new_size < TARGET_PAGE_SIZE) {
 113         return -1;
 114     }
 115
 116     XBZRLE_cache_lock();
 117
 118     if (XBZRLE.cache != NULL) {
 119         if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
 120             goto out_new_size;
 121         }
 122         new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
 123                                         TARGET_PAGE_SIZE);
 124         if (!new_cache) {
 125             error_report("Error creating cache");
 126             ret = -1;
 127             goto out;
 128         }
 129
 130         cache_fini(XBZRLE.cache);
 131         XBZRLE.cache = new_cache;
 132     }
 133
 134 out_new_size:
 135     ret = pow2floor(new_size);
 136 out:
 137     XBZRLE_cache_unlock();
 138     return ret;
 139 }
 140
 141 /* State of RAM for migration */
 142 struct RAMState {
 143     /* Last block that we have visited searching for dirty pages */
 144     RAMBlock *last_seen_block;
 145     /* Last block from where we have sent data */
 146     RAMBlock *last_sent_block;
 147     /* Last offset we have sent data from */
 148     ram_addr_t last_offset;
 149     /* last ram version we have seen */
 150     uint32_t last_version;
 151     /* We are in the first round */
 152     bool ram_bulk_stage;
 153     /* How many times we have dirty too many pages */
 154     int dirty_rate_high_cnt;
 155     /* How many times we have synchronized the bitmap */
 156     uint64_t bitmap_sync_count;
 157     /* these variables are used for bitmap sync */
 158     /* last time we did a full bitmap_sync */
 159     int64_t time_last_bitmap_sync;
 160     /* bytes transferred at start_time */
 161     uint64_t bytes_xfer_prev;
 162     /* number of dirty pages since start_time */
 163     uint64_t num_dirty_pages_period;
 164     /* xbzrle misses since the beginning of the period */
 165     uint64_t xbzrle_cache_miss_prev;
 166     /* number of iterations at the beginning of period */
 167     uint64_t iterations_prev;
 168     /* Accounting fields */
 169     /* number of zero pages.  It used to be pages filled by the same char. */
 170     uint64_t zero_pages;
 171     /* number of normal transferred pages */
 172     uint64_t norm_pages;
 173     /* Iterations since start */
 174     uint64_t iterations;
 175 };
 176 typedef struct RAMState RAMState;
 177
 178 static RAMState ram_state;
 179
 180 /* accounting for migration statistics */
 181 typedef struct AccountingInfo {
 182     uint64_t xbzrle_bytes;
 183     uint64_t xbzrle_pages;
 184     uint64_t xbzrle_cache_miss;
 185     double xbzrle_cache_miss_rate;
 186     uint64_t xbzrle_overflows;
 187 } AccountingInfo;
 188
 189 static AccountingInfo acct_info;
 190
 191 static void acct_clear(void)
 192 {
 193     memset(&acct_info, 0, sizeof(acct_info));
 194 }
 195
 196 uint64_t dup_mig_pages_transferred(void)
 197 {
 198     return ram_state.zero_pages;
 199 }
 200
 201 uint64_t norm_mig_pages_transferred(void)
 202 {
 203     return ram_state.norm_pages;
 204 }
 205
 206 uint64_t xbzrle_mig_bytes_transferred(void)
 207 {
 208     return acct_info.xbzrle_bytes;
 209 }
 210
 211 uint64_t xbzrle_mig_pages_transferred(void)
 212 {
 213     return acct_info.xbzrle_pages;
 214 }
 215
 216 uint64_t xbzrle_mig_pages_cache_miss(void)
 217 {
 218     return acct_info.xbzrle_cache_miss;
 219 }
 220
 221 double xbzrle_mig_cache_miss_rate(void)
 222 {
 223     return acct_info.xbzrle_cache_miss_rate;
 224 }
 225
 226 uint64_t xbzrle_mig_pages_overflow(void)
 227 {
 228     return acct_info.xbzrle_overflows;
 229 }
 230
 231 static QemuMutex migration_bitmap_mutex;
 232 static uint64_t migration_dirty_pages;
 233
 234 /* used by the search for pages to send */
 235 struct PageSearchStatus {
 236     /* Current block being searched */
 237     RAMBlock    *block;
 238     /* Current offset to search from */
 239     ram_addr_t   offset;
 240     /* Set once we wrap around */
 241     bool         complete_round;
 242 };
 243 typedef struct PageSearchStatus PageSearchStatus;
 244
 245 static struct BitmapRcu {
 246     struct rcu_head rcu;
 247     /* Main migration bitmap */
 248     unsigned long *bmap;
 249     /* bitmap of pages that haven't been sent even once
 250      * only maintained and used in postcopy at the moment
 251      * where it's used to send the dirtymap at the start
 252      * of the postcopy phase
 253      */
 254     unsigned long *unsentmap;
 255 } *migration_bitmap_rcu;
 256
 257 struct CompressParam {
 258     bool done;
 259     bool quit;
 260     QEMUFile *file;
 261     QemuMutex mutex;
 262     QemuCond cond;
 263     RAMBlock *block;
 264     ram_addr_t offset;
 265 };
 266 typedef struct CompressParam CompressParam;
 267
 268 struct DecompressParam {
 269     bool done;
 270     bool quit;
 271     QemuMutex mutex;
 272     QemuCond cond;
 273     void *des;
 274     uint8_t *compbuf;
 275     int len;
 276 };
 277 typedef struct DecompressParam DecompressParam;
 278
 279 static CompressParam *comp_param;
 280 static QemuThread *compress_threads;
 281 /* comp_done_cond is used to wake up the migration thread when
 282  * one of the compression threads has finished the compression.
 283  * comp_done_lock is used to co-work with comp_done_cond.
 284  */
 285 static QemuMutex comp_done_lock;
 286 static QemuCond comp_done_cond;
 287 /* The empty QEMUFileOps will be used by file in CompressParam */
 288 static const QEMUFileOps empty_ops = { };
 289
 290 static bool compression_switch;
 291 static DecompressParam *decomp_param;
 292 static QemuThread *decompress_threads;
 293 static QemuMutex decomp_done_lock;
 294 static QemuCond decomp_done_cond;
 295
 296 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
 297                                 ram_addr_t offset);
 298
 299 static void *do_data_compress(void *opaque)
 300 {
 301     CompressParam *param = opaque;
 302     RAMBlock *block;
 303     ram_addr_t offset;
 304
 305     qemu_mutex_lock(&param->mutex);
 306     while (!param->quit) {
 307         if (param->block) {
 308             block = param->block;
 309             offset = param->offset;
 310             param->block = NULL;
 311             qemu_mutex_unlock(&param->mutex);
 312
 313             do_compress_ram_page(param->file, block, offset);
 314
 315             qemu_mutex_lock(&comp_done_lock);
 316             param->done = true;
 317             qemu_cond_signal(&comp_done_cond);
 318             qemu_mutex_unlock(&comp_done_lock);
 319
 320             qemu_mutex_lock(&param->mutex);
 321         } else {
 322             qemu_cond_wait(&param->cond, &param->mutex);
 323         }
 324     }
 325     qemu_mutex_unlock(&param->mutex);
 326
 327     return NULL;
 328 }
 329
 330 static inline void terminate_compression_threads(void)
 331 {
 332     int idx, thread_count;
 333
 334     thread_count = migrate_compress_threads();
 335
 336     for (idx = 0; idx < thread_count; idx++) {
 337         qemu_mutex_lock(&comp_param[idx].mutex);
 338         comp_param[idx].quit = true;
 339         qemu_cond_signal(&comp_param[idx].cond);
 340         qemu_mutex_unlock(&comp_param[idx].mutex);
 341     }
 342 }
 343
 344 void migrate_compress_threads_join(void)
 345 {
 346     int i, thread_count;
 347
 348     if (!migrate_use_compression()) {
 349         return;
 350     }
 351     terminate_compression_threads();
 352     thread_count = migrate_compress_threads();
 353     for (i = 0; i < thread_count; i++) {
 354         qemu_thread_join(compress_threads + i);
 355         qemu_fclose(comp_param[i].file);
 356         qemu_mutex_destroy(&comp_param[i].mutex);
 357         qemu_cond_destroy(&comp_param[i].cond);
 358     }
 359     qemu_mutex_destroy(&comp_done_lock);
 360     qemu_cond_destroy(&comp_done_cond);
 361     g_free(compress_threads);
 362     g_free(comp_param);
 363     compress_threads = NULL;
 364     comp_param = NULL;
 365 }
 366
 367 void migrate_compress_threads_create(void)
 368 {
 369     int i, thread_count;
 370
 371     if (!migrate_use_compression()) {
 372         return;
 373     }
 374     compression_switch = true;
 375     thread_count = migrate_compress_threads();
 376     compress_threads = g_new0(QemuThread, thread_count);
 377     comp_param = g_new0(CompressParam, thread_count);
 378     qemu_cond_init(&comp_done_cond);
 379     qemu_mutex_init(&comp_done_lock);
 380     for (i = 0; i < thread_count; i++) {
 381         /* comp_param[i].file is just used as a dummy buffer to save data,
 382          * set its ops to empty.
 383          */
 384         comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
 385         comp_param[i].done = true;
 386         comp_param[i].quit = false;
 387         qemu_mutex_init(&comp_param[i].mutex);
 388         qemu_cond_init(&comp_param[i].cond);
 389         qemu_thread_create(compress_threads + i, "compress",
 390                            do_data_compress, comp_param + i,
 391                            QEMU_THREAD_JOINABLE);
 392     }
 393 }
 394
 395 /**
 396  * save_page_header: write page header to wire
 397  *
 398  * If this is the 1st block, it also writes the block identification
 399  *
 400  * Returns the number of bytes written
 401  *
 402  * @f: QEMUFile where to send the data
 403  * @block: block that contains the page we want to send
 404  * @offset: offset inside the block for the page
 405  *          in the lower bits, it contains flags
 406  */
 407 static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
 408 {
 409     size_t size, len;
 410
 411     qemu_put_be64(f, offset);
 412     size = 8;
 413
 414     if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
 415         len = strlen(block->idstr);
 416         qemu_put_byte(f, len);
 417         qemu_put_buffer(f, (uint8_t *)block->idstr, len);
 418         size += 1 + len;
 419     }
 420     return size;
 421 }
 422
 423 /**
 424  * mig_throttle_guest_down: throotle down the guest
 425  *
 426  * Reduce amount of guest cpu execution to hopefully slow down memory
 427  * writes. If guest dirty memory rate is reduced below the rate at
 428  * which we can transfer pages to the destination then we should be
 429  * able to complete migration. Some workloads dirty memory way too
 430  * fast and will not effectively converge, even with auto-converge.
 431  */
 432 static void mig_throttle_guest_down(void)
 433 {
 434     MigrationState *s = migrate_get_current();
 435     uint64_t pct_initial = s->parameters.cpu_throttle_initial;
 436     uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
 437
 438     /* We have not started throttling yet. Let's start it. */
 439     if (!cpu_throttle_active()) {
 440         cpu_throttle_set(pct_initial);
 441     } else {
 442         /* Throttling already on, just increase the rate */
 443         cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
 444     }
 445 }
 446
 447 /**
 448  * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
 449  *
 450  * @rs: current RAM state
 451  * @current_addr: address for the zero page
 452  *
 453  * Update the xbzrle cache to reflect a page that's been sent as all 0.
 454  * The important thing is that a stale (not-yet-0'd) page be replaced
 455  * by the new data.
 456  * As a bonus, if the page wasn't in the cache it gets added so that
 457  * when a small write is made into the 0'd page it gets XBZRLE sent.
 458  */
 459 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
 460 {
 461     if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
 462         return;
 463     }
 464
 465     /* We don't care if this fails to allocate a new cache page
 466      * as long as it updated an old one */
 467     cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
 468                  rs->bitmap_sync_count);
 469 }
 470
 471 #define ENCODING_FLAG_XBZRLE 0x1
 472
 473 /**
 474  * save_xbzrle_page: compress and send current page
 475  *
 476  * Returns: 1 means that we wrote the page
 477  *          0 means that page is identical to the one already sent
 478  *          -1 means that xbzrle would be longer than normal
 479  *
 480  * @rs: current RAM state
 481  * @f: QEMUFile where to send the data
 482  * @current_data: pointer to the address of the page contents
 483  * @current_addr: addr of the page
 484  * @block: block that contains the page we want to send
 485  * @offset: offset inside the block for the page
 486  * @last_stage: if we are at the completion stage
 487  * @bytes_transferred: increase it with the number of transferred bytes
 488  */
 489 static int save_xbzrle_page(RAMState *rs, QEMUFile *f, uint8_t **current_data,
 490                             ram_addr_t current_addr, RAMBlock *block,
 491                             ram_addr_t offset, bool last_stage,
 492                             uint64_t *bytes_transferred)
 493 {
 494     int encoded_len = 0, bytes_xbzrle;
 495     uint8_t *prev_cached_page;
 496
 497     if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) {
 498         acct_info.xbzrle_cache_miss++;
 499         if (!last_stage) {
 500             if (cache_insert(XBZRLE.cache, current_addr, *current_data,
 501                              rs->bitmap_sync_count) == -1) {
 502                 return -1;
 503             } else {
 504                 /* update *current_data when the page has been
 505                    inserted into cache */
 506                 *current_data = get_cached_data(XBZRLE.cache, current_addr);
 507             }
 508         }
 509         return -1;
 510     }
 511
 512     prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
 513
 514     /* save current buffer into memory */
 515     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
 516
 517     /* XBZRLE encoding (if there is no overflow) */
 518     encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
 519                                        TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
 520                                        TARGET_PAGE_SIZE);
 521     if (encoded_len == 0) {
 522         trace_save_xbzrle_page_skipping();
 523         return 0;
 524     } else if (encoded_len == -1) {
 525         trace_save_xbzrle_page_overflow();
 526         acct_info.xbzrle_overflows++;
 527         /* update data in the cache */
 528         if (!last_stage) {
 529             memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
 530             *current_data = prev_cached_page;
 531         }
 532         return -1;
 533     }
 534
 535     /* we need to update the data in the cache, in order to get the same data */
 536     if (!last_stage) {
 537         memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
 538     }
 539
 540     /* Send XBZRLE based compressed page */
 541     bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
 542     qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
 543     qemu_put_be16(f, encoded_len);
 544     qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
 545     bytes_xbzrle += encoded_len + 1 + 2;
 546     acct_info.xbzrle_pages++;
 547     acct_info.xbzrle_bytes += bytes_xbzrle;
 548     *bytes_transferred += bytes_xbzrle;
 549
 550     return 1;
 551 }
 552
 553 /**
 554  * migration_bitmap_find_dirty: find the next dirty page from start
 555  *
 556  * Called with rcu_read_lock() to protect migration_bitmap
 557  *
 558  * Returns the byte offset within memory region of the start of a dirty page
 559  *
 560  * @rs: current RAM state
 561  * @rb: RAMBlock where to search for dirty pages
 562  * @start: starting address (typically so we can continue from previous page)
 563  * @ram_addr_abs: pointer into which to store the address of the dirty page
 564  *                within the global ram_addr space
 565  */
 566 static inline
 567 ram_addr_t migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
 568                                        ram_addr_t start,
 569                                        ram_addr_t *ram_addr_abs)
 570 {
 571     unsigned long base = rb->offset >> TARGET_PAGE_BITS;
 572     unsigned long nr = base + (start >> TARGET_PAGE_BITS);
 573     uint64_t rb_size = rb->used_length;
 574     unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
 575     unsigned long *bitmap;
 576
 577     unsigned long next;
 578
 579     bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
 580     if (rs->ram_bulk_stage && nr > base) {
 581         next = nr + 1;
 582     } else {
 583         next = find_next_bit(bitmap, size, nr);
 584     }
 585
 586     *ram_addr_abs = next << TARGET_PAGE_BITS;
 587     return (next - base) << TARGET_PAGE_BITS;
 588 }
 589
 590 static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
 591 {
 592     bool ret;
 593     int nr = addr >> TARGET_PAGE_BITS;
 594     unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
 595
 596     ret = test_and_clear_bit(nr, bitmap);
 597
 598     if (ret) {
 599         migration_dirty_pages--;
 600     }
 601     return ret;
 602 }
 603
 604 static void migration_bitmap_sync_range(RAMState *rs, ram_addr_t start,
 605                                         ram_addr_t length)
 606 {
 607     unsigned long *bitmap;
 608     bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
 609     migration_dirty_pages += cpu_physical_memory_sync_dirty_bitmap(bitmap,
 610                              start, length, &rs->num_dirty_pages_period);
 611 }
 612
 613 static void migration_bitmap_sync_init(RAMState *rs)
 614 {
 615     rs->time_last_bitmap_sync = 0;
 616     rs->bytes_xfer_prev = 0;
 617     rs->num_dirty_pages_period = 0;
 618     rs->xbzrle_cache_miss_prev = 0;
 619     rs->iterations_prev = 0;
 620 }
 621
 622 /**
 623  * ram_pagesize_summary: calculate all the pagesizes of a VM
 624  *
 625  * Returns a summary bitmap of the page sizes of all RAMBlocks
 626  *
 627  * For VMs with just normal pages this is equivalent to the host page
 628  * size. If it's got some huge pages then it's the OR of all the
 629  * different page sizes.
 630  */
 631 uint64_t ram_pagesize_summary(void)
 632 {
 633     RAMBlock *block;
 634     uint64_t summary = 0;
 635
 636     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 637         summary |= block->page_size;
 638     }
 639
 640     return summary;
 641 }
 642
 643 static void migration_bitmap_sync(RAMState *rs)
 644 {
 645     RAMBlock *block;
 646     MigrationState *s = migrate_get_current();
 647     int64_t end_time;
 648     uint64_t bytes_xfer_now;
 649
 650     rs->bitmap_sync_count++;
 651
 652     if (!rs->bytes_xfer_prev) {
 653         rs->bytes_xfer_prev = ram_bytes_transferred();
 654     }
 655
 656     if (!rs->time_last_bitmap_sync) {
 657         rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 658     }
 659
 660     trace_migration_bitmap_sync_start();
 661     memory_global_dirty_log_sync();
 662
 663     qemu_mutex_lock(&migration_bitmap_mutex);
 664     rcu_read_lock();
 665     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 666         migration_bitmap_sync_range(rs, block->offset, block->used_length);
 667     }
 668     rcu_read_unlock();
 669     qemu_mutex_unlock(&migration_bitmap_mutex);
 670
 671     trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
 672
 673     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 674
 675     /* more than 1 second = 1000 millisecons */
 676     if (end_time > rs->time_last_bitmap_sync + 1000) {
 677         if (migrate_auto_converge()) {
 678             /* The following detection logic can be refined later. For now:
 679                Check to see if the dirtied bytes is 50% more than the approx.
 680                amount of bytes that just got transferred since the last time we
 681                were in this routine. If that happens twice, start or increase
 682                throttling */
 683             bytes_xfer_now = ram_bytes_transferred();
 684
 685             if (s->dirty_pages_rate &&
 686                (rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
 687                    (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
 688                (rs->dirty_rate_high_cnt++ >= 2)) {
 689                     trace_migration_throttle();
 690                     rs->dirty_rate_high_cnt = 0;
 691                     mig_throttle_guest_down();
 692              }
 693              rs->bytes_xfer_prev = bytes_xfer_now;
 694         }
 695
 696         if (migrate_use_xbzrle()) {
 697             if (rs->iterations_prev != rs->iterations) {
 698                 acct_info.xbzrle_cache_miss_rate =
 699                    (double)(acct_info.xbzrle_cache_miss -
 700                             rs->xbzrle_cache_miss_prev) /
 701                    (rs->iterations - rs->iterations_prev);
 702             }
 703             rs->iterations_prev = rs->iterations;
 704             rs->xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
 705         }
 706         s->dirty_pages_rate = rs->num_dirty_pages_period * 1000
 707             / (end_time - rs->time_last_bitmap_sync);
 708         s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
 709         rs->time_last_bitmap_sync = end_time;
 710         rs->num_dirty_pages_period = 0;
 711     }
 712     s->dirty_sync_count = rs->bitmap_sync_count;
 713     if (migrate_use_events()) {
 714         qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL);
 715     }
 716 }
 717
 718 /**
 719  * save_zero_page: send the zero page to the stream
 720  *
 721  * Returns the number of pages written.
 722  *
 723  * @rs: current RAM state
 724  * @f: QEMUFile where to send the data
 725  * @block: block that contains the page we want to send
 726  * @offset: offset inside the block for the page
 727  * @p: pointer to the page
 728  * @bytes_transferred: increase it with the number of transferred bytes
 729  */
 730 static int save_zero_page(RAMState *rs, QEMUFile *f, RAMBlock *block,
 731                           ram_addr_t offset,
 732                           uint8_t *p, uint64_t *bytes_transferred)
 733 {
 734     int pages = -1;
 735
 736     if (is_zero_range(p, TARGET_PAGE_SIZE)) {
 737         rs->zero_pages++;
 738         *bytes_transferred += save_page_header(f, block,
 739                                                offset | RAM_SAVE_FLAG_COMPRESS);
 740         qemu_put_byte(f, 0);
 741         *bytes_transferred += 1;
 742         pages = 1;
 743     }
 744
 745     return pages;
 746 }
 747
 748 static void ram_release_pages(MigrationState *ms, const char *rbname,
 749                               uint64_t offset, int pages)
 750 {
 751     if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
 752         return;
 753     }
 754
 755     ram_discard_range(NULL, rbname, offset, pages << TARGET_PAGE_BITS);
 756 }
 757
 758 /**
 759  * ram_save_page: send the given page to the stream
 760  *
 761  * Returns the number of pages written.
 762  *          < 0 - error
 763  *          >=0 - Number of pages written - this might legally be 0
 764  *                if xbzrle noticed the page was the same.
 765  *
 766  * @rs: current RAM state
 767  * @ms: current migration state
 768  * @f: QEMUFile where to send the data
 769  * @block: block that contains the page we want to send
 770  * @offset: offset inside the block for the page
 771  * @last_stage: if we are at the completion stage
 772  * @bytes_transferred: increase it with the number of transferred bytes
 773  */
 774 static int ram_save_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
 775                          PageSearchStatus *pss, bool last_stage,
 776                          uint64_t *bytes_transferred)
 777 {
 778     int pages = -1;
 779     uint64_t bytes_xmit;
 780     ram_addr_t current_addr;
 781     uint8_t *p;
 782     int ret;
 783     bool send_async = true;
 784     RAMBlock *block = pss->block;
 785     ram_addr_t offset = pss->offset;
 786
 787     p = block->host + offset;
 788
 789     /* In doubt sent page as normal */
 790     bytes_xmit = 0;
 791     ret = ram_control_save_page(f, block->offset,
 792                            offset, TARGET_PAGE_SIZE, &bytes_xmit);
 793     if (bytes_xmit) {
 794         *bytes_transferred += bytes_xmit;
 795         pages = 1;
 796     }
 797
 798     XBZRLE_cache_lock();
 799
 800     current_addr = block->offset + offset;
 801
 802     if (block == rs->last_sent_block) {
 803         offset |= RAM_SAVE_FLAG_CONTINUE;
 804     }
 805     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
 806         if (ret != RAM_SAVE_CONTROL_DELAYED) {
 807             if (bytes_xmit > 0) {
 808                 rs->norm_pages++;
 809             } else if (bytes_xmit == 0) {
 810                 rs->zero_pages++;
 811             }
 812         }
 813     } else {
 814         pages = save_zero_page(rs, f, block, offset, p, bytes_transferred);
 815         if (pages > 0) {
 816             /* Must let xbzrle know, otherwise a previous (now 0'd) cached
 817              * page would be stale
 818              */
 819             xbzrle_cache_zero_page(rs, current_addr);
 820             ram_release_pages(ms, block->idstr, pss->offset, pages);
 821         } else if (!rs->ram_bulk_stage &&
 822                    !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
 823             pages = save_xbzrle_page(rs, f, &p, current_addr, block,
 824                                      offset, last_stage, bytes_transferred);
 825             if (!last_stage) {
 826                 /* Can't send this cached data async, since the cache page
 827                  * might get updated before it gets to the wire
 828                  */
 829                 send_async = false;
 830             }
 831         }
 832     }
 833
 834     /* XBZRLE overflow or normal page */
 835     if (pages == -1) {
 836         *bytes_transferred += save_page_header(f, block,
 837                                                offset | RAM_SAVE_FLAG_PAGE);
 838         if (send_async) {
 839             qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
 840                                   migrate_release_ram() &
 841                                   migration_in_postcopy(ms));
 842         } else {
 843             qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
 844         }
 845         *bytes_transferred += TARGET_PAGE_SIZE;
 846         pages = 1;
 847         rs->norm_pages++;
 848     }
 849
 850     XBZRLE_cache_unlock();
 851
 852     return pages;
 853 }
 854
 855 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
 856                                 ram_addr_t offset)
 857 {
 858     int bytes_sent, blen;
 859     uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
 860
 861     bytes_sent = save_page_header(f, block, offset |
 862                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
 863     blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
 864                                      migrate_compress_level());
 865     if (blen < 0) {
 866         bytes_sent = 0;
 867         qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
 868         error_report("compressed data failed!");
 869     } else {
 870         bytes_sent += blen;
 871         ram_release_pages(migrate_get_current(), block->idstr,
 872                           offset & TARGET_PAGE_MASK, 1);
 873     }
 874
 875     return bytes_sent;
 876 }
 877
 878 static uint64_t bytes_transferred;
 879
 880 static void flush_compressed_data(QEMUFile *f)
 881 {
 882     int idx, len, thread_count;
 883
 884     if (!migrate_use_compression()) {
 885         return;
 886     }
 887     thread_count = migrate_compress_threads();
 888
 889     qemu_mutex_lock(&comp_done_lock);
 890     for (idx = 0; idx < thread_count; idx++) {
 891         while (!comp_param[idx].done) {
 892             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
 893         }
 894     }
 895     qemu_mutex_unlock(&comp_done_lock);
 896
 897     for (idx = 0; idx < thread_count; idx++) {
 898         qemu_mutex_lock(&comp_param[idx].mutex);
 899         if (!comp_param[idx].quit) {
 900             len = qemu_put_qemu_file(f, comp_param[idx].file);
 901             bytes_transferred += len;
 902         }
 903         qemu_mutex_unlock(&comp_param[idx].mutex);
 904     }
 905 }
 906
 907 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
 908                                        ram_addr_t offset)
 909 {
 910     param->block = block;
 911     param->offset = offset;
 912 }
 913
 914 static int compress_page_with_multi_thread(RAMState *rs, QEMUFile *f,
 915                                            RAMBlock *block, ram_addr_t offset,
 916                                            uint64_t *bytes_transferred)
 917 {
 918     int idx, thread_count, bytes_xmit = -1, pages = -1;
 919
 920     thread_count = migrate_compress_threads();
 921     qemu_mutex_lock(&comp_done_lock);
 922     while (true) {
 923         for (idx = 0; idx < thread_count; idx++) {
 924             if (comp_param[idx].done) {
 925                 comp_param[idx].done = false;
 926                 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
 927                 qemu_mutex_lock(&comp_param[idx].mutex);
 928                 set_compress_params(&comp_param[idx], block, offset);
 929                 qemu_cond_signal(&comp_param[idx].cond);
 930                 qemu_mutex_unlock(&comp_param[idx].mutex);
 931                 pages = 1;
 932                 rs->norm_pages++;
 933                 *bytes_transferred += bytes_xmit;
 934                 break;
 935             }
 936         }
 937         if (pages > 0) {
 938             break;
 939         } else {
 940             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
 941         }
 942     }
 943     qemu_mutex_unlock(&comp_done_lock);
 944
 945     return pages;
 946 }
 947
 948 /**
 949  * ram_save_compressed_page: compress the given page and send it to the stream
 950  *
 951  * Returns the number of pages written.
 952  *
 953  * @rs: current RAM state
 954  * @ms: current migration state
 955  * @f: QEMUFile where to send the data
 956  * @block: block that contains the page we want to send
 957  * @offset: offset inside the block for the page
 958  * @last_stage: if we are at the completion stage
 959  * @bytes_transferred: increase it with the number of transferred bytes
 960  */
 961 static int ram_save_compressed_page(RAMState *rs, MigrationState *ms,
 962                                     QEMUFile *f,
 963                                     PageSearchStatus *pss, bool last_stage,
 964                                     uint64_t *bytes_transferred)
 965 {
 966     int pages = -1;
 967     uint64_t bytes_xmit = 0;
 968     uint8_t *p;
 969     int ret, blen;
 970     RAMBlock *block = pss->block;
 971     ram_addr_t offset = pss->offset;
 972
 973     p = block->host + offset;
 974
 975     ret = ram_control_save_page(f, block->offset,
 976                                 offset, TARGET_PAGE_SIZE, &bytes_xmit);
 977     if (bytes_xmit) {
 978         *bytes_transferred += bytes_xmit;
 979         pages = 1;
 980     }
 981     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
 982         if (ret != RAM_SAVE_CONTROL_DELAYED) {
 983             if (bytes_xmit > 0) {
 984                 rs->norm_pages++;
 985             } else if (bytes_xmit == 0) {
 986                 rs->zero_pages++;
 987             }
 988         }
 989     } else {
 990         /* When starting the process of a new block, the first page of
 991          * the block should be sent out before other pages in the same
 992          * block, and all the pages in last block should have been sent
 993          * out, keeping this order is important, because the 'cont' flag
 994          * is used to avoid resending the block name.
 995          */
 996         if (block != rs->last_sent_block) {
 997             flush_compressed_data(f);
 998             pages = save_zero_page(rs, f, block, offset, p, bytes_transferred);
 999             if (pages == -1) {
1000                 /* Make sure the first page is sent out before other pages */
1001                 bytes_xmit = save_page_header(f, block, offset |
1002                                               RAM_SAVE_FLAG_COMPRESS_PAGE);
1003                 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
1004                                                  migrate_compress_level());
1005                 if (blen > 0) {
1006                     *bytes_transferred += bytes_xmit + blen;
1007                     rs->norm_pages++;
1008                     pages = 1;
1009                 } else {
1010                     qemu_file_set_error(f, blen);
1011                     error_report("compressed data failed!");
1012                 }
1013             }
1014             if (pages > 0) {
1015                 ram_release_pages(ms, block->idstr, pss->offset, pages);
1016             }
1017         } else {
1018             offset |= RAM_SAVE_FLAG_CONTINUE;
1019             pages = save_zero_page(rs, f, block, offset, p, bytes_transferred);
1020             if (pages == -1) {
1021                 pages = compress_page_with_multi_thread(rs, f, block, offset,
1022                                                         bytes_transferred);
1023             } else {
1024                 ram_release_pages(ms, block->idstr, pss->offset, pages);
1025             }
1026         }
1027     }
1028
1029     return pages;
1030 }
1031
1032 /**
1033  * find_dirty_block: find the next dirty page and update any state
1034  * associated with the search process.
1035  *
1036  * Returns if a page is found
1037  *
1038  * @rs: current RAM state
1039  * @f: QEMUFile where to send the data
1040  * @pss: data about the state of the current dirty page scan
1041  * @again: set to false if the search has scanned the whole of RAM
1042  * @ram_addr_abs: pointer into which to store the address of the dirty page
1043  *                within the global ram_addr space
1044  */
1045 static bool find_dirty_block(RAMState *rs, QEMUFile *f, PageSearchStatus *pss,
1046                              bool *again, ram_addr_t *ram_addr_abs)
1047 {
1048     pss->offset = migration_bitmap_find_dirty(rs, pss->block, pss->offset,
1049                                               ram_addr_abs);
1050     if (pss->complete_round && pss->block == rs->last_seen_block &&
1051         pss->offset >= rs->last_offset) {
1052         /*
1053          * We've been once around the RAM and haven't found anything.
1054          * Give up.
1055          */
1056         *again = false;
1057         return false;
1058     }
1059     if (pss->offset >= pss->block->used_length) {
1060         /* Didn't find anything in this RAM Block */
1061         pss->offset = 0;
1062         pss->block = QLIST_NEXT_RCU(pss->block, next);
1063         if (!pss->block) {
1064             /* Hit the end of the list */
1065             pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1066             /* Flag that we've looped */
1067             pss->complete_round = true;
1068             rs->ram_bulk_stage = false;
1069             if (migrate_use_xbzrle()) {
1070                 /* If xbzrle is on, stop using the data compression at this
1071                  * point. In theory, xbzrle can do better than compression.
1072                  */
1073                 flush_compressed_data(f);
1074                 compression_switch = false;
1075             }
1076         }
1077         /* Didn't find anything this time, but try again on the new block */
1078         *again = true;
1079         return false;
1080     } else {
1081         /* Can go around again, but... */
1082         *again = true;
1083         /* We've found something so probably don't need to */
1084         return true;
1085     }
1086 }
1087
1088 /**
1089  * unqueue_page: gets a page of the queue
1090  *
1091  * Helper for 'get_queued_page' - gets a page off the queue
1092  *
1093  * Returns the block of the page (or NULL if none available)
1094  *
1095  * @ms: current migration state
1096  * @offset: used to return the offset within the RAMBlock
1097  * @ram_addr_abs: pointer into which to store the address of the dirty page
1098  *                within the global ram_addr space
1099  */
1100 static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1101                               ram_addr_t *ram_addr_abs)
1102 {
1103     RAMBlock *block = NULL;
1104
1105     qemu_mutex_lock(&ms->src_page_req_mutex);
1106     if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1107         struct MigrationSrcPageRequest *entry =
1108                                 QSIMPLEQ_FIRST(&ms->src_page_requests);
1109         block = entry->rb;
1110         *offset = entry->offset;
1111         *ram_addr_abs = (entry->offset + entry->rb->offset) &
1112                         TARGET_PAGE_MASK;
1113
1114         if (entry->len > TARGET_PAGE_SIZE) {
1115             entry->len -= TARGET_PAGE_SIZE;
1116             entry->offset += TARGET_PAGE_SIZE;
1117         } else {
1118             memory_region_unref(block->mr);
1119             QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1120             g_free(entry);
1121         }
1122     }
1123     qemu_mutex_unlock(&ms->src_page_req_mutex);
1124
1125     return block;
1126 }
1127
1128 /**
1129  * get_queued_page: unqueue a page from the postocpy requests
1130  *
1131  * Skips pages that are already sent (!dirty)
1132  *
1133  * Returns if a queued page is found
1134  *
1135  * @rs: current RAM state
1136  * @ms: current migration state
1137  * @pss: data about the state of the current dirty page scan
1138  * @ram_addr_abs: pointer into which to store the address of the dirty page
1139  *                within the global ram_addr space
1140  */
1141 static bool get_queued_page(RAMState *rs, MigrationState *ms,
1142                             PageSearchStatus *pss,
1143                             ram_addr_t *ram_addr_abs)
1144 {
1145     RAMBlock  *block;
1146     ram_addr_t offset;
1147     bool dirty;
1148
1149     do {
1150         block = unqueue_page(ms, &offset, ram_addr_abs);
1151         /*
1152          * We're sending this page, and since it's postcopy nothing else
1153          * will dirty it, and we must make sure it doesn't get sent again
1154          * even if this queue request was received after the background
1155          * search already sent it.
1156          */
1157         if (block) {
1158             unsigned long *bitmap;
1159             bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1160             dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1161             if (!dirty) {
1162                 trace_get_queued_page_not_dirty(
1163                     block->idstr, (uint64_t)offset,
1164                     (uint64_t)*ram_addr_abs,
1165                     test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1166                          atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1167             } else {
1168                 trace_get_queued_page(block->idstr,
1169                                       (uint64_t)offset,
1170                                       (uint64_t)*ram_addr_abs);
1171             }
1172         }
1173
1174     } while (block && !dirty);
1175
1176     if (block) {
1177         /*
1178          * As soon as we start servicing pages out of order, then we have
1179          * to kill the bulk stage, since the bulk stage assumes
1180          * in (migration_bitmap_find_and_reset_dirty) that every page is
1181          * dirty, that's no longer true.
1182          */
1183         rs->ram_bulk_stage = false;
1184
1185         /*
1186          * We want the background search to continue from the queued page
1187          * since the guest is likely to want other pages near to the page
1188          * it just requested.
1189          */
1190         pss->block = block;
1191         pss->offset = offset;
1192     }
1193
1194     return !!block;
1195 }
1196
1197 /**
1198  * migration_page_queue_free: drop any remaining pages in the ram
1199  * request queue
1200  *
1201  * It should be empty at the end anyway, but in error cases there may
1202  * be some left.  in case that there is any page left, we drop it.
1203  *
1204  * @ms: current migration state
1205  */
1206 void migration_page_queue_free(MigrationState *ms)
1207 {
1208     struct MigrationSrcPageRequest *mspr, *next_mspr;
1209     /* This queue generally should be empty - but in the case of a failed
1210      * migration might have some droppings in.
1211      */
1212     rcu_read_lock();
1213     QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1214         memory_region_unref(mspr->rb->mr);
1215         QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1216         g_free(mspr);
1217     }
1218     rcu_read_unlock();
1219 }
1220
1221 /**
1222  * ram_save_queue_pages: queue the page for transmission
1223  *
1224  * A request from postcopy destination for example.
1225  *
1226  * Returns zero on success or negative on error
1227  *
1228  * @ms: current migration state
1229  * @rbname: Name of the RAMBLock of the request. NULL means the
1230  *          same that last one.
1231  * @start: starting address from the start of the RAMBlock
1232  * @len: length (in bytes) to send
1233  */
1234 int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1235                          ram_addr_t start, ram_addr_t len)
1236 {
1237     RAMBlock *ramblock;
1238
1239     ms->postcopy_requests++;
1240     rcu_read_lock();
1241     if (!rbname) {
1242         /* Reuse last RAMBlock */
1243         ramblock = ms->last_req_rb;
1244
1245         if (!ramblock) {
1246             /*
1247              * Shouldn't happen, we can't reuse the last RAMBlock if
1248              * it's the 1st request.
1249              */
1250             error_report("ram_save_queue_pages no previous block");
1251             goto err;
1252         }
1253     } else {
1254         ramblock = qemu_ram_block_by_name(rbname);
1255
1256         if (!ramblock) {
1257             /* We shouldn't be asked for a non-existent RAMBlock */
1258             error_report("ram_save_queue_pages no block '%s'", rbname);
1259             goto err;
1260         }
1261         ms->last_req_rb = ramblock;
1262     }
1263     trace_ram_save_queue_pages(ramblock->idstr, start, len);
1264     if (start+len > ramblock->used_length) {
1265         error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1266                      RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1267                      __func__, start, len, ramblock->used_length);
1268         goto err;
1269     }
1270
1271     struct MigrationSrcPageRequest *new_entry =
1272         g_malloc0(sizeof(struct MigrationSrcPageRequest));
1273     new_entry->rb = ramblock;
1274     new_entry->offset = start;
1275     new_entry->len = len;
1276
1277     memory_region_ref(ramblock->mr);
1278     qemu_mutex_lock(&ms->src_page_req_mutex);
1279     QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1280     qemu_mutex_unlock(&ms->src_page_req_mutex);
1281     rcu_read_unlock();
1282
1283     return 0;
1284
1285 err:
1286     rcu_read_unlock();
1287     return -1;
1288 }
1289
1290 /**
1291  * ram_save_target_page: save one target page
1292  *
1293  * Returns the number of pages written
1294  *
1295  * @rs: current RAM state
1296  * @ms: current migration state
1297  * @f: QEMUFile where to send the data
1298  * @pss: data about the page we want to send
1299  * @last_stage: if we are at the completion stage
1300  * @bytes_transferred: increase it with the number of transferred bytes
1301  * @dirty_ram_abs: address of the start of the dirty page in ram_addr_t space
1302  */
1303 static int ram_save_target_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
1304                                 PageSearchStatus *pss,
1305                                 bool last_stage,
1306                                 uint64_t *bytes_transferred,
1307                                 ram_addr_t dirty_ram_abs)
1308 {
1309     int res = 0;
1310
1311     /* Check the pages is dirty and if it is send it */
1312     if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1313         unsigned long *unsentmap;
1314         if (compression_switch && migrate_use_compression()) {
1315             res = ram_save_compressed_page(rs, ms, f, pss,
1316                                            last_stage,
1317                                            bytes_transferred);
1318         } else {
1319             res = ram_save_page(rs, ms, f, pss, last_stage,
1320                                 bytes_transferred);
1321         }
1322
1323         if (res < 0) {
1324             return res;
1325         }
1326         unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1327         if (unsentmap) {
1328             clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1329         }
1330         /* Only update last_sent_block if a block was actually sent; xbzrle
1331          * might have decided the page was identical so didn't bother writing
1332          * to the stream.
1333          */
1334         if (res > 0) {
1335             rs->last_sent_block = pss->block;
1336         }
1337     }
1338
1339     return res;
1340 }
1341
1342 /**
1343  * ram_save_host_page: save a whole host page
1344  *
1345  * Starting at *offset send pages up to the end of the current host
1346  * page. It's valid for the initial offset to point into the middle of
1347  * a host page in which case the remainder of the hostpage is sent.
1348  * Only dirty target pages are sent. Note that the host page size may
1349  * be a huge page for this block.
1350  *
1351  * Returns the number of pages written or negative on error
1352  *
1353  * @rs: current RAM state
1354  * @ms: current migration state
1355  * @f: QEMUFile where to send the data
1356  * @pss: data about the page we want to send
1357  * @last_stage: if we are at the completion stage
1358  * @bytes_transferred: increase it with the number of transferred bytes
1359  * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1360  */
1361 static int ram_save_host_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
1362                               PageSearchStatus *pss,
1363                               bool last_stage,
1364                               uint64_t *bytes_transferred,
1365                               ram_addr_t dirty_ram_abs)
1366 {
1367     int tmppages, pages = 0;
1368     size_t pagesize = qemu_ram_pagesize(pss->block);
1369
1370     do {
1371         tmppages = ram_save_target_page(rs, ms, f, pss, last_stage,
1372                                         bytes_transferred, dirty_ram_abs);
1373         if (tmppages < 0) {
1374             return tmppages;
1375         }
1376
1377         pages += tmppages;
1378         pss->offset += TARGET_PAGE_SIZE;
1379         dirty_ram_abs += TARGET_PAGE_SIZE;
1380     } while (pss->offset & (pagesize - 1));
1381
1382     /* The offset we leave with is the last one we looked at */
1383     pss->offset -= TARGET_PAGE_SIZE;
1384     return pages;
1385 }
1386
1387 /**
1388  * ram_find_and_save_block: finds a dirty page and sends it to f
1389  *
1390  * Called within an RCU critical section.
1391  *
1392  * Returns the number of pages written where zero means no dirty pages
1393  *
1394  * @rs: current RAM state
1395  * @f: QEMUFile where to send the data
1396  * @last_stage: if we are at the completion stage
1397  * @bytes_transferred: increase it with the number of transferred bytes
1398  *
1399  * On systems where host-page-size > target-page-size it will send all the
1400  * pages in a host page that are dirty.
1401  */
1402
1403 static int ram_find_and_save_block(RAMState *rs, QEMUFile *f, bool last_stage,
1404                                    uint64_t *bytes_transferred)
1405 {
1406     PageSearchStatus pss;
1407     MigrationState *ms = migrate_get_current();
1408     int pages = 0;
1409     bool again, found;
1410     ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1411                                  ram_addr_t space */
1412
1413     /* No dirty page as there is zero RAM */
1414     if (!ram_bytes_total()) {
1415         return pages;
1416     }
1417
1418     pss.block = rs->last_seen_block;
1419     pss.offset = rs->last_offset;
1420     pss.complete_round = false;
1421
1422     if (!pss.block) {
1423         pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1424     }
1425
1426     do {
1427         again = true;
1428         found = get_queued_page(rs, ms, &pss, &dirty_ram_abs);
1429
1430         if (!found) {
1431             /* priority queue empty, so just search for something dirty */
1432             found = find_dirty_block(rs, f, &pss, &again, &dirty_ram_abs);
1433         }
1434
1435         if (found) {
1436             pages = ram_save_host_page(rs, ms, f, &pss,
1437                                        last_stage, bytes_transferred,
1438                                        dirty_ram_abs);
1439         }
1440     } while (!pages && again);
1441
1442     rs->last_seen_block = pss.block;
1443     rs->last_offset = pss.offset;
1444
1445     return pages;
1446 }
1447
1448 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1449 {
1450     uint64_t pages = size / TARGET_PAGE_SIZE;
1451     RAMState *rs = &ram_state;
1452
1453     if (zero) {
1454         rs->zero_pages += pages;
1455     } else {
1456         rs->norm_pages += pages;
1457         bytes_transferred += size;
1458         qemu_update_position(f, size);
1459     }
1460 }
1461
1462 static ram_addr_t ram_save_remaining(void)
1463 {
1464     return migration_dirty_pages;
1465 }
1466
1467 uint64_t ram_bytes_remaining(void)
1468 {
1469     return ram_save_remaining() * TARGET_PAGE_SIZE;
1470 }
1471
1472 uint64_t ram_bytes_transferred(void)
1473 {
1474     return bytes_transferred;
1475 }
1476
1477 uint64_t ram_bytes_total(void)
1478 {
1479     RAMBlock *block;
1480     uint64_t total = 0;
1481
1482     rcu_read_lock();
1483     QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1484         total += block->used_length;
1485     rcu_read_unlock();
1486     return total;
1487 }
1488
1489 void free_xbzrle_decoded_buf(void)
1490 {
1491     g_free(xbzrle_decoded_buf);
1492     xbzrle_decoded_buf = NULL;
1493 }
1494
1495 static void migration_bitmap_free(struct BitmapRcu *bmap)
1496 {
1497     g_free(bmap->bmap);
1498     g_free(bmap->unsentmap);
1499     g_free(bmap);
1500 }
1501
1502 static void ram_migration_cleanup(void *opaque)
1503 {
1504     /* caller have hold iothread lock or is in a bh, so there is
1505      * no writing race against this migration_bitmap
1506      */
1507     struct BitmapRcu *bitmap = migration_bitmap_rcu;
1508     atomic_rcu_set(&migration_bitmap_rcu, NULL);
1509     if (bitmap) {
1510         memory_global_dirty_log_stop();
1511         call_rcu(bitmap, migration_bitmap_free, rcu);
1512     }
1513
1514     XBZRLE_cache_lock();
1515     if (XBZRLE.cache) {
1516         cache_fini(XBZRLE.cache);
1517         g_free(XBZRLE.encoded_buf);
1518         g_free(XBZRLE.current_buf);
1519         g_free(ZERO_TARGET_PAGE);
1520         XBZRLE.cache = NULL;
1521         XBZRLE.encoded_buf = NULL;
1522         XBZRLE.current_buf = NULL;
1523     }
1524     XBZRLE_cache_unlock();
1525 }
1526
1527 static void ram_state_reset(RAMState *rs)
1528 {
1529     rs->last_seen_block = NULL;
1530     rs->last_sent_block = NULL;
1531     rs->last_offset = 0;
1532     rs->last_version = ram_list.version;
1533     rs->ram_bulk_stage = true;
1534 }
1535
1536 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1537
1538 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1539 {
1540     /* called in qemu main thread, so there is
1541      * no writing race against this migration_bitmap
1542      */
1543     if (migration_bitmap_rcu) {
1544         struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1545         bitmap = g_new(struct BitmapRcu, 1);
1546         bitmap->bmap = bitmap_new(new);
1547
1548         /* prevent migration_bitmap content from being set bit
1549          * by migration_bitmap_sync_range() at the same time.
1550          * it is safe to migration if migration_bitmap is cleared bit
1551          * at the same time.
1552          */
1553         qemu_mutex_lock(&migration_bitmap_mutex);
1554         bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1555         bitmap_set(bitmap->bmap, old, new - old);
1556
1557         /* We don't have a way to safely extend the sentmap
1558          * with RCU; so mark it as missing, entry to postcopy
1559          * will fail.
1560          */
1561         bitmap->unsentmap = NULL;
1562
1563         atomic_rcu_set(&migration_bitmap_rcu, bitmap);
1564         qemu_mutex_unlock(&migration_bitmap_mutex);
1565         migration_dirty_pages += new - old;
1566         call_rcu(old_bitmap, migration_bitmap_free, rcu);
1567     }
1568 }
1569
1570 /*
1571  * 'expected' is the value you expect the bitmap mostly to be full
1572  * of; it won't bother printing lines that are all this value.
1573  * If 'todump' is null the migration bitmap is dumped.
1574  */
1575 void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1576 {
1577     int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1578
1579     int64_t cur;
1580     int64_t linelen = 128;
1581     char linebuf[129];
1582
1583     if (!todump) {
1584         todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1585     }
1586
1587     for (cur = 0; cur < ram_pages; cur += linelen) {
1588         int64_t curb;
1589         bool found = false;
1590         /*
1591          * Last line; catch the case where the line length
1592          * is longer than remaining ram
1593          */
1594         if (cur + linelen > ram_pages) {
1595             linelen = ram_pages - cur;
1596         }
1597         for (curb = 0; curb < linelen; curb++) {
1598             bool thisbit = test_bit(cur + curb, todump);
1599             linebuf[curb] = thisbit ? '1' : '.';
1600             found = found || (thisbit != expected);
1601         }
1602         if (found) {
1603             linebuf[curb] = '\0';
1604             fprintf(stderr,  "0x%08" PRIx64 " : %s\n", cur, linebuf);
1605         }
1606     }
1607 }
1608
1609 /* **** functions for postcopy ***** */
1610
1611 void ram_postcopy_migrated_memory_release(MigrationState *ms)
1612 {
1613     struct RAMBlock *block;
1614     unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1615
1616     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1617         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1618         unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1619         unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1620
1621         while (run_start < range) {
1622             unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1623             ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS,
1624                               (run_end - run_start) << TARGET_PAGE_BITS);
1625             run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1626         }
1627     }
1628 }
1629
1630 /**
1631  * postcopy_send_discard_bm_ram: discard a RAMBlock
1632  *
1633  * Returns zero on success
1634  *
1635  * Callback from postcopy_each_ram_send_discard for each RAMBlock
1636  * Note: At this point the 'unsentmap' is the processed bitmap combined
1637  *       with the dirtymap; so a '1' means it's either dirty or unsent.
1638  *
1639  * @ms: current migration state
1640  * @pds: state for postcopy
1641  * @start: RAMBlock starting page
1642  * @length: RAMBlock size
1643  */
1644 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1645                                         PostcopyDiscardState *pds,
1646                                         unsigned long start,
1647                                         unsigned long length)
1648 {
1649     unsigned long end = start + length; /* one after the end */
1650     unsigned long current;
1651     unsigned long *unsentmap;
1652
1653     unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1654     for (current = start; current < end; ) {
1655         unsigned long one = find_next_bit(unsentmap, end, current);
1656
1657         if (one <= end) {
1658             unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1659             unsigned long discard_length;
1660
1661             if (zero >= end) {
1662                 discard_length = end - one;
1663             } else {
1664                 discard_length = zero - one;
1665             }
1666             if (discard_length) {
1667                 postcopy_discard_send_range(ms, pds, one, discard_length);
1668             }
1669             current = one + discard_length;
1670         } else {
1671             current = one;
1672         }
1673     }
1674
1675     return 0;
1676 }
1677
1678 /**
1679  * postcopy_each_ram_send_discard: discard all RAMBlocks
1680  *
1681  * Returns 0 for success or negative for error
1682  *
1683  * Utility for the outgoing postcopy code.
1684  *   Calls postcopy_send_discard_bm_ram for each RAMBlock
1685  *   passing it bitmap indexes and name.
1686  * (qemu_ram_foreach_block ends up passing unscaled lengths
1687  *  which would mean postcopy code would have to deal with target page)
1688  *
1689  * @ms: current migration state
1690  */
1691 static int postcopy_each_ram_send_discard(MigrationState *ms)
1692 {
1693     struct RAMBlock *block;
1694     int ret;
1695
1696     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1697         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1698         PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1699                                                                first,
1700                                                                block->idstr);
1701
1702         /*
1703          * Postcopy sends chunks of bitmap over the wire, but it
1704          * just needs indexes at this point, avoids it having
1705          * target page specific code.
1706          */
1707         ret = postcopy_send_discard_bm_ram(ms, pds, first,
1708                                     block->used_length >> TARGET_PAGE_BITS);
1709         postcopy_discard_send_finish(ms, pds);
1710         if (ret) {
1711             return ret;
1712         }
1713     }
1714
1715     return 0;
1716 }
1717
1718 /**
1719  * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1720  *
1721  * Helper for postcopy_chunk_hostpages; it's called twice to
1722  * canonicalize the two bitmaps, that are similar, but one is
1723  * inverted.
1724  *
1725  * Postcopy requires that all target pages in a hostpage are dirty or
1726  * clean, not a mix.  This function canonicalizes the bitmaps.
1727  *
1728  * @ms: current migration state
1729  * @unsent_pass: if true we need to canonicalize partially unsent host pages
1730  *               otherwise we need to canonicalize partially dirty host pages
1731  * @block: block that contains the page we want to canonicalize
1732  * @pds: state for postcopy
1733  */
1734 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1735                                           RAMBlock *block,
1736                                           PostcopyDiscardState *pds)
1737 {
1738     unsigned long *bitmap;
1739     unsigned long *unsentmap;
1740     unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
1741     unsigned long first = block->offset >> TARGET_PAGE_BITS;
1742     unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1743     unsigned long last = first + (len - 1);
1744     unsigned long run_start;
1745
1746     if (block->page_size == TARGET_PAGE_SIZE) {
1747         /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1748         return;
1749     }
1750
1751     bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1752     unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1753
1754     if (unsent_pass) {
1755         /* Find a sent page */
1756         run_start = find_next_zero_bit(unsentmap, last + 1, first);
1757     } else {
1758         /* Find a dirty page */
1759         run_start = find_next_bit(bitmap, last + 1, first);
1760     }
1761
1762     while (run_start <= last) {
1763         bool do_fixup = false;
1764         unsigned long fixup_start_addr;
1765         unsigned long host_offset;
1766
1767         /*
1768          * If the start of this run of pages is in the middle of a host
1769          * page, then we need to fixup this host page.
1770          */
1771         host_offset = run_start % host_ratio;
1772         if (host_offset) {
1773             do_fixup = true;
1774             run_start -= host_offset;
1775             fixup_start_addr = run_start;
1776             /* For the next pass */
1777             run_start = run_start + host_ratio;
1778         } else {
1779             /* Find the end of this run */
1780             unsigned long run_end;
1781             if (unsent_pass) {
1782                 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1783             } else {
1784                 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1785             }
1786             /*
1787              * If the end isn't at the start of a host page, then the
1788              * run doesn't finish at the end of a host page
1789              * and we need to discard.
1790              */
1791             host_offset = run_end % host_ratio;
1792             if (host_offset) {
1793                 do_fixup = true;
1794                 fixup_start_addr = run_end - host_offset;
1795                 /*
1796                  * This host page has gone, the next loop iteration starts
1797                  * from after the fixup
1798                  */
1799                 run_start = fixup_start_addr + host_ratio;
1800             } else {
1801                 /*
1802                  * No discards on this iteration, next loop starts from
1803                  * next sent/dirty page
1804                  */
1805                 run_start = run_end + 1;
1806             }
1807         }
1808
1809         if (do_fixup) {
1810             unsigned long page;
1811
1812             /* Tell the destination to discard this page */
1813             if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1814                 /* For the unsent_pass we:
1815                  *     discard partially sent pages
1816                  * For the !unsent_pass (dirty) we:
1817                  *     discard partially dirty pages that were sent
1818                  *     (any partially sent pages were already discarded
1819                  *     by the previous unsent_pass)
1820                  */
1821                 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1822                                             host_ratio);
1823             }
1824
1825             /* Clean up the bitmap */
1826             for (page = fixup_start_addr;
1827                  page < fixup_start_addr + host_ratio; page++) {
1828                 /* All pages in this host page are now not sent */
1829                 set_bit(page, unsentmap);
1830
1831                 /*
1832                  * Remark them as dirty, updating the count for any pages
1833                  * that weren't previously dirty.
1834                  */
1835                 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1836             }
1837         }
1838
1839         if (unsent_pass) {
1840             /* Find the next sent page for the next iteration */
1841             run_start = find_next_zero_bit(unsentmap, last + 1,
1842                                            run_start);
1843         } else {
1844             /* Find the next dirty page for the next iteration */
1845             run_start = find_next_bit(bitmap, last + 1, run_start);
1846         }
1847     }
1848 }
1849
1850 /**
1851  * postcopy_chuck_hostpages: discrad any partially sent host page
1852  *
1853  * Utility for the outgoing postcopy code.
1854  *
1855  * Discard any partially sent host-page size chunks, mark any partially
1856  * dirty host-page size chunks as all dirty.  In this case the host-page
1857  * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1858  *
1859  * Returns zero on success
1860  *
1861  * @ms: current migration state
1862  */
1863 static int postcopy_chunk_hostpages(MigrationState *ms)
1864 {
1865     RAMState *rs = &ram_state;
1866     struct RAMBlock *block;
1867
1868     /* Easiest way to make sure we don't resume in the middle of a host-page */
1869     rs->last_seen_block = NULL;
1870     rs->last_sent_block = NULL;
1871     rs->last_offset     = 0;
1872
1873     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1874         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1875
1876         PostcopyDiscardState *pds =
1877                          postcopy_discard_send_init(ms, first, block->idstr);
1878
1879         /* First pass: Discard all partially sent host pages */
1880         postcopy_chunk_hostpages_pass(ms, true, block, pds);
1881         /*
1882          * Second pass: Ensure that all partially dirty host pages are made
1883          * fully dirty.
1884          */
1885         postcopy_chunk_hostpages_pass(ms, false, block, pds);
1886
1887         postcopy_discard_send_finish(ms, pds);
1888     } /* ram_list loop */
1889
1890     return 0;
1891 }
1892
1893 /**
1894  * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1895  *
1896  * Returns zero on success
1897  *
1898  * Transmit the set of pages to be discarded after precopy to the target
1899  * these are pages that:
1900  *     a) Have been previously transmitted but are now dirty again
1901  *     b) Pages that have never been transmitted, this ensures that
1902  *        any pages on the destination that have been mapped by background
1903  *        tasks get discarded (transparent huge pages is the specific concern)
1904  * Hopefully this is pretty sparse
1905  *
1906  * @ms: current migration state
1907  */
1908 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1909 {
1910     int ret;
1911     unsigned long *bitmap, *unsentmap;
1912
1913     rcu_read_lock();
1914
1915     /* This should be our last sync, the src is now paused */
1916     migration_bitmap_sync(&ram_state);
1917
1918     unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1919     if (!unsentmap) {
1920         /* We don't have a safe way to resize the sentmap, so
1921          * if the bitmap was resized it will be NULL at this
1922          * point.
1923          */
1924         error_report("migration ram resized during precopy phase");
1925         rcu_read_unlock();
1926         return -EINVAL;
1927     }
1928
1929     /* Deal with TPS != HPS and huge pages */
1930     ret = postcopy_chunk_hostpages(ms);
1931     if (ret) {
1932         rcu_read_unlock();
1933         return ret;
1934     }
1935
1936     /*
1937      * Update the unsentmap to be unsentmap = unsentmap | dirty
1938      */
1939     bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1940     bitmap_or(unsentmap, unsentmap, bitmap,
1941                last_ram_offset() >> TARGET_PAGE_BITS);
1942
1943
1944     trace_ram_postcopy_send_discard_bitmap();
1945 #ifdef DEBUG_POSTCOPY
1946     ram_debug_dump_bitmap(unsentmap, true);
1947 #endif
1948
1949     ret = postcopy_each_ram_send_discard(ms);
1950     rcu_read_unlock();
1951
1952     return ret;
1953 }
1954
1955 /**
1956  * ram_discard_range: discard dirtied pages at the beginning of postcopy
1957  *
1958  * Returns zero on success
1959  *
1960  * @mis: current migration incoming state
1961  * @rbname: name of the RAMBlock of the request. NULL means the
1962  *          same that last one.
1963  * @start: RAMBlock starting page
1964  * @length: RAMBlock size
1965  */
1966 int ram_discard_range(MigrationIncomingState *mis,
1967                       const char *rbname,
1968                       uint64_t start, size_t length)
1969 {
1970     int ret = -1;
1971
1972     trace_ram_discard_range(rbname, start, length);
1973
1974     rcu_read_lock();
1975     RAMBlock *rb = qemu_ram_block_by_name(rbname);
1976
1977     if (!rb) {
1978         error_report("ram_discard_range: Failed to find block '%s'", rbname);
1979         goto err;
1980     }
1981
1982     ret = ram_block_discard_range(rb, start, length);
1983
1984 err:
1985     rcu_read_unlock();
1986
1987     return ret;
1988 }
1989
1990 static int ram_save_init_globals(RAMState *rs)
1991 {
1992     int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1993
1994     rs->dirty_rate_high_cnt = 0;
1995     rs->bitmap_sync_count = 0;
1996     rs->zero_pages = 0;
1997     rs->norm_pages = 0;
1998     rs->iterations = 0;
1999     migration_bitmap_sync_init(rs);
2000     qemu_mutex_init(&migration_bitmap_mutex);
2001
2002     if (migrate_use_xbzrle()) {
2003         XBZRLE_cache_lock();
2004         ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
2005         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
2006                                   TARGET_PAGE_SIZE,
2007                                   TARGET_PAGE_SIZE);
2008         if (!XBZRLE.cache) {
2009             XBZRLE_cache_unlock();
2010             error_report("Error creating cache");
2011             return -1;
2012         }
2013         XBZRLE_cache_unlock();
2014
2015         /* We prefer not to abort if there is no memory */
2016         XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2017         if (!XBZRLE.encoded_buf) {
2018             error_report("Error allocating encoded_buf");
2019             return -1;
2020         }
2021
2022         XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2023         if (!XBZRLE.current_buf) {
2024             error_report("Error allocating current_buf");
2025             g_free(XBZRLE.encoded_buf);
2026             XBZRLE.encoded_buf = NULL;
2027             return -1;
2028         }
2029
2030         acct_clear();
2031     }
2032
2033     /* For memory_global_dirty_log_start below.  */
2034     qemu_mutex_lock_iothread();
2035
2036     qemu_mutex_lock_ramlist();
2037     rcu_read_lock();
2038     bytes_transferred = 0;
2039     ram_state_reset(rs);
2040
2041     migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
2042     /* Skip setting bitmap if there is no RAM */
2043     if (ram_bytes_total()) {
2044         ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2045         migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
2046         bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
2047
2048         if (migrate_postcopy_ram()) {
2049             migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
2050             bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
2051         }
2052     }
2053
2054     /*
2055      * Count the total number of pages used by ram blocks not including any
2056      * gaps due to alignment or unplugs.
2057      */
2058     migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2059
2060     memory_global_dirty_log_start();
2061     migration_bitmap_sync(rs);
2062     qemu_mutex_unlock_ramlist();
2063     qemu_mutex_unlock_iothread();
2064     rcu_read_unlock();
2065
2066     return 0;
2067 }
2068
2069 /*
2070  * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2071  * long-running RCU critical section.  When rcu-reclaims in the code
2072  * start to become numerous it will be necessary to reduce the
2073  * granularity of these critical sections.
2074  */
2075
2076 /**
2077  * ram_save_setup: Setup RAM for migration
2078  *
2079  * Returns zero to indicate success and negative for error
2080  *
2081  * @f: QEMUFile where to send the data
2082  * @opaque: RAMState pointer
2083  */
2084 static int ram_save_setup(QEMUFile *f, void *opaque)
2085 {
2086     RAMState *rs = opaque;
2087     RAMBlock *block;
2088
2089     /* migration has already setup the bitmap, reuse it. */
2090     if (!migration_in_colo_state()) {
2091         if (ram_save_init_globals(rs) < 0) {
2092             return -1;
2093          }
2094     }
2095
2096     rcu_read_lock();
2097
2098     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2099
2100     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2101         qemu_put_byte(f, strlen(block->idstr));
2102         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2103         qemu_put_be64(f, block->used_length);
2104         if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2105             qemu_put_be64(f, block->page_size);
2106         }
2107     }
2108
2109     rcu_read_unlock();
2110
2111     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2112     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2113
2114     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2115
2116     return 0;
2117 }
2118
2119 /**
2120  * ram_save_iterate: iterative stage for migration
2121  *
2122  * Returns zero to indicate success and negative for error
2123  *
2124  * @f: QEMUFile where to send the data
2125  * @opaque: RAMState pointer
2126  */
2127 static int ram_save_iterate(QEMUFile *f, void *opaque)
2128 {
2129     RAMState *rs = opaque;
2130     int ret;
2131     int i;
2132     int64_t t0;
2133     int done = 0;
2134
2135     rcu_read_lock();
2136     if (ram_list.version != rs->last_version) {
2137         ram_state_reset(rs);
2138     }
2139
2140     /* Read version before ram_list.blocks */
2141     smp_rmb();
2142
2143     ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2144
2145     t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2146     i = 0;
2147     while ((ret = qemu_file_rate_limit(f)) == 0) {
2148         int pages;
2149
2150         pages = ram_find_and_save_block(rs, f, false, &bytes_transferred);
2151         /* no more pages to sent */
2152         if (pages == 0) {
2153             done = 1;
2154             break;
2155         }
2156         rs->iterations++;
2157
2158         /* we want to check in the 1st loop, just in case it was the 1st time
2159            and we had to sync the dirty bitmap.
2160            qemu_get_clock_ns() is a bit expensive, so we only check each some
2161            iterations
2162         */
2163         if ((i & 63) == 0) {
2164             uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2165             if (t1 > MAX_WAIT) {
2166                 trace_ram_save_iterate_big_wait(t1, i);
2167                 break;
2168             }
2169         }
2170         i++;
2171     }
2172     flush_compressed_data(f);
2173     rcu_read_unlock();
2174
2175     /*
2176      * Must occur before EOS (or any QEMUFile operation)
2177      * because of RDMA protocol.
2178      */
2179     ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2180
2181     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2182     bytes_transferred += 8;
2183
2184     ret = qemu_file_get_error(f);
2185     if (ret < 0) {
2186         return ret;
2187     }
2188
2189     return done;
2190 }
2191
2192 /**
2193  * ram_save_complete: function called to send the remaining amount of ram
2194  *
2195  * Returns zero to indicate success
2196  *
2197  * Called with iothread lock
2198  *
2199  * @f: QEMUFile where to send the data
2200  * @opaque: RAMState pointer
2201  */
2202 static int ram_save_complete(QEMUFile *f, void *opaque)
2203 {
2204     RAMState *rs = opaque;
2205
2206     rcu_read_lock();
2207
2208     if (!migration_in_postcopy(migrate_get_current())) {
2209         migration_bitmap_sync(rs);
2210     }
2211
2212     ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2213
2214     /* try transferring iterative blocks of memory */
2215
2216     /* flush all remaining blocks regardless of rate limiting */
2217     while (true) {
2218         int pages;
2219
2220         pages = ram_find_and_save_block(rs, f, !migration_in_colo_state(),
2221                                         &bytes_transferred);
2222         /* no more blocks to sent */
2223         if (pages == 0) {
2224             break;
2225         }
2226     }
2227
2228     flush_compressed_data(f);
2229     ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2230
2231     rcu_read_unlock();
2232
2233     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2234
2235     return 0;
2236 }
2237
2238 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2239                              uint64_t *non_postcopiable_pending,
2240                              uint64_t *postcopiable_pending)
2241 {
2242     RAMState *rs = opaque;
2243     uint64_t remaining_size;
2244
2245     remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2246
2247     if (!migration_in_postcopy(migrate_get_current()) &&
2248         remaining_size < max_size) {
2249         qemu_mutex_lock_iothread();
2250         rcu_read_lock();
2251         migration_bitmap_sync(rs);
2252         rcu_read_unlock();
2253         qemu_mutex_unlock_iothread();
2254         remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2255     }
2256
2257     /* We can do postcopy, and all the data is postcopiable */
2258     *postcopiable_pending += remaining_size;
2259 }
2260
2261 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2262 {
2263     unsigned int xh_len;
2264     int xh_flags;
2265     uint8_t *loaded_data;
2266
2267     if (!xbzrle_decoded_buf) {
2268         xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2269     }
2270     loaded_data = xbzrle_decoded_buf;
2271
2272     /* extract RLE header */
2273     xh_flags = qemu_get_byte(f);
2274     xh_len = qemu_get_be16(f);
2275
2276     if (xh_flags != ENCODING_FLAG_XBZRLE) {
2277         error_report("Failed to load XBZRLE page - wrong compression!");
2278         return -1;
2279     }
2280
2281     if (xh_len > TARGET_PAGE_SIZE) {
2282         error_report("Failed to load XBZRLE page - len overflow!");
2283         return -1;
2284     }
2285     /* load data and decode */
2286     qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2287
2288     /* decode RLE */
2289     if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2290                              TARGET_PAGE_SIZE) == -1) {
2291         error_report("Failed to load XBZRLE page - decode error!");
2292         return -1;
2293     }
2294
2295     return 0;
2296 }
2297
2298 /**
2299  * ram_block_from_stream: read a RAMBlock id from the migration stream
2300  *
2301  * Must be called from within a rcu critical section.
2302  *
2303  * Returns a pointer from within the RCU-protected ram_list.
2304  *
2305  * @f: QEMUFile where to read the data from
2306  * @flags: Page flags (mostly to see if it's a continuation of previous block)
2307  */
2308 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
2309 {
2310     static RAMBlock *block = NULL;
2311     char id[256];
2312     uint8_t len;
2313
2314     if (flags & RAM_SAVE_FLAG_CONTINUE) {
2315         if (!block) {
2316             error_report("Ack, bad migration stream!");
2317             return NULL;
2318         }
2319         return block;
2320     }
2321
2322     len = qemu_get_byte(f);
2323     qemu_get_buffer(f, (uint8_t *)id, len);
2324     id[len] = 0;
2325
2326     block = qemu_ram_block_by_name(id);
2327     if (!block) {
2328         error_report("Can't find block %s", id);
2329         return NULL;
2330     }
2331
2332     return block;
2333 }
2334
2335 static inline void *host_from_ram_block_offset(RAMBlock *block,
2336                                                ram_addr_t offset)
2337 {
2338     if (!offset_in_ramblock(block, offset)) {
2339         return NULL;
2340     }
2341
2342     return block->host + offset;
2343 }
2344
2345 /**
2346  * ram_handle_compressed: handle the zero page case
2347  *
2348  * If a page (or a whole RDMA chunk) has been
2349  * determined to be zero, then zap it.
2350  *
2351  * @host: host address for the zero page
2352  * @ch: what the page is filled from.  We only support zero
2353  * @size: size of the zero page
2354  */
2355 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2356 {
2357     if (ch != 0 || !is_zero_range(host, size)) {
2358         memset(host, ch, size);
2359     }
2360 }
2361
2362 static void *do_data_decompress(void *opaque)
2363 {
2364     DecompressParam *param = opaque;
2365     unsigned long pagesize;
2366     uint8_t *des;
2367     int len;
2368
2369     qemu_mutex_lock(&param->mutex);
2370     while (!param->quit) {
2371         if (param->des) {
2372             des = param->des;
2373             len = param->len;
2374             param->des = 0;
2375             qemu_mutex_unlock(&param->mutex);
2376
2377             pagesize = TARGET_PAGE_SIZE;
2378             /* uncompress() will return failed in some case, especially
2379              * when the page is dirted when doing the compression, it's
2380              * not a problem because the dirty page will be retransferred
2381              * and uncompress() won't break the data in other pages.
2382              */
2383             uncompress((Bytef *)des, &pagesize,
2384                        (const Bytef *)param->compbuf, len);
2385
2386             qemu_mutex_lock(&decomp_done_lock);
2387             param->done = true;
2388             qemu_cond_signal(&decomp_done_cond);
2389             qemu_mutex_unlock(&decomp_done_lock);
2390
2391             qemu_mutex_lock(&param->mutex);
2392         } else {
2393             qemu_cond_wait(&param->cond, &param->mutex);
2394         }
2395     }
2396     qemu_mutex_unlock(&param->mutex);
2397
2398     return NULL;
2399 }
2400
2401 static void wait_for_decompress_done(void)
2402 {
2403     int idx, thread_count;
2404
2405     if (!migrate_use_compression()) {
2406         return;
2407     }
2408
2409     thread_count = migrate_decompress_threads();
2410     qemu_mutex_lock(&decomp_done_lock);
2411     for (idx = 0; idx < thread_count; idx++) {
2412         while (!decomp_param[idx].done) {
2413             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2414         }
2415     }
2416     qemu_mutex_unlock(&decomp_done_lock);
2417 }
2418
2419 void migrate_decompress_threads_create(void)
2420 {
2421     int i, thread_count;
2422
2423     thread_count = migrate_decompress_threads();
2424     decompress_threads = g_new0(QemuThread, thread_count);
2425     decomp_param = g_new0(DecompressParam, thread_count);
2426     qemu_mutex_init(&decomp_done_lock);
2427     qemu_cond_init(&decomp_done_cond);
2428     for (i = 0; i < thread_count; i++) {
2429         qemu_mutex_init(&decomp_param[i].mutex);
2430         qemu_cond_init(&decomp_param[i].cond);
2431         decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2432         decomp_param[i].done = true;
2433         decomp_param[i].quit = false;
2434         qemu_thread_create(decompress_threads + i, "decompress",
2435                            do_data_decompress, decomp_param + i,
2436                            QEMU_THREAD_JOINABLE);
2437     }
2438 }
2439
2440 void migrate_decompress_threads_join(void)
2441 {
2442     int i, thread_count;
2443
2444     thread_count = migrate_decompress_threads();
2445     for (i = 0; i < thread_count; i++) {
2446         qemu_mutex_lock(&decomp_param[i].mutex);
2447         decomp_param[i].quit = true;
2448         qemu_cond_signal(&decomp_param[i].cond);
2449         qemu_mutex_unlock(&decomp_param[i].mutex);
2450     }
2451     for (i = 0; i < thread_count; i++) {
2452         qemu_thread_join(decompress_threads + i);
2453         qemu_mutex_destroy(&decomp_param[i].mutex);
2454         qemu_cond_destroy(&decomp_param[i].cond);
2455         g_free(decomp_param[i].compbuf);
2456     }
2457     g_free(decompress_threads);
2458     g_free(decomp_param);
2459     decompress_threads = NULL;
2460     decomp_param = NULL;
2461 }
2462
2463 static void decompress_data_with_multi_threads(QEMUFile *f,
2464                                                void *host, int len)
2465 {
2466     int idx, thread_count;
2467
2468     thread_count = migrate_decompress_threads();
2469     qemu_mutex_lock(&decomp_done_lock);
2470     while (true) {
2471         for (idx = 0; idx < thread_count; idx++) {
2472             if (decomp_param[idx].done) {
2473                 decomp_param[idx].done = false;
2474                 qemu_mutex_lock(&decomp_param[idx].mutex);
2475                 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2476                 decomp_param[idx].des = host;
2477                 decomp_param[idx].len = len;
2478                 qemu_cond_signal(&decomp_param[idx].cond);
2479                 qemu_mutex_unlock(&decomp_param[idx].mutex);
2480                 break;
2481             }
2482         }
2483         if (idx < thread_count) {
2484             break;
2485         } else {
2486             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2487         }
2488     }
2489     qemu_mutex_unlock(&decomp_done_lock);
2490 }
2491
2492 /**
2493  * ram_postcopy_incoming_init: allocate postcopy data structures
2494  *
2495  * Returns 0 for success and negative if there was one error
2496  *
2497  * @mis: current migration incoming state
2498  *
2499  * Allocate data structures etc needed by incoming migration with
2500  * postcopy-ram. postcopy-ram's similarly names
2501  * postcopy_ram_incoming_init does the work.
2502  */
2503 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2504 {
2505     size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2506
2507     return postcopy_ram_incoming_init(mis, ram_pages);
2508 }
2509
2510 /**
2511  * ram_load_postcopy: load a page in postcopy case
2512  *
2513  * Returns 0 for success or -errno in case of error
2514  *
2515  * Called in postcopy mode by ram_load().
2516  * rcu_read_lock is taken prior to this being called.
2517  *
2518  * @f: QEMUFile where to send the data
2519  */
2520 static int ram_load_postcopy(QEMUFile *f)
2521 {
2522     int flags = 0, ret = 0;
2523     bool place_needed = false;
2524     bool matching_page_sizes = false;
2525     MigrationIncomingState *mis = migration_incoming_get_current();
2526     /* Temporary page that is later 'placed' */
2527     void *postcopy_host_page = postcopy_get_tmp_page(mis);
2528     void *last_host = NULL;
2529     bool all_zero = false;
2530
2531     while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2532         ram_addr_t addr;
2533         void *host = NULL;
2534         void *page_buffer = NULL;
2535         void *place_source = NULL;
2536         RAMBlock *block = NULL;
2537         uint8_t ch;
2538
2539         addr = qemu_get_be64(f);
2540         flags = addr & ~TARGET_PAGE_MASK;
2541         addr &= TARGET_PAGE_MASK;
2542
2543         trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2544         place_needed = false;
2545         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
2546             block = ram_block_from_stream(f, flags);
2547
2548             host = host_from_ram_block_offset(block, addr);
2549             if (!host) {
2550                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2551                 ret = -EINVAL;
2552                 break;
2553             }
2554             matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
2555             /*
2556              * Postcopy requires that we place whole host pages atomically;
2557              * these may be huge pages for RAMBlocks that are backed by
2558              * hugetlbfs.
2559              * To make it atomic, the data is read into a temporary page
2560              * that's moved into place later.
2561              * The migration protocol uses,  possibly smaller, target-pages
2562              * however the source ensures it always sends all the components
2563              * of a host page in order.
2564              */
2565             page_buffer = postcopy_host_page +
2566                           ((uintptr_t)host & (block->page_size - 1));
2567             /* If all TP are zero then we can optimise the place */
2568             if (!((uintptr_t)host & (block->page_size - 1))) {
2569                 all_zero = true;
2570             } else {
2571                 /* not the 1st TP within the HP */
2572                 if (host != (last_host + TARGET_PAGE_SIZE)) {
2573                     error_report("Non-sequential target page %p/%p",
2574                                   host, last_host);
2575                     ret = -EINVAL;
2576                     break;
2577                 }
2578             }
2579
2580
2581             /*
2582              * If it's the last part of a host page then we place the host
2583              * page
2584              */
2585             place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2586                                      (block->page_size - 1)) == 0;
2587             place_source = postcopy_host_page;
2588         }
2589         last_host = host;
2590
2591         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2592         case RAM_SAVE_FLAG_COMPRESS:
2593             ch = qemu_get_byte(f);
2594             memset(page_buffer, ch, TARGET_PAGE_SIZE);
2595             if (ch) {
2596                 all_zero = false;
2597             }
2598             break;
2599
2600         case RAM_SAVE_FLAG_PAGE:
2601             all_zero = false;
2602             if (!place_needed || !matching_page_sizes) {
2603                 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2604             } else {
2605                 /* Avoids the qemu_file copy during postcopy, which is
2606                  * going to do a copy later; can only do it when we
2607                  * do this read in one go (matching page sizes)
2608                  */
2609                 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2610                                          TARGET_PAGE_SIZE);
2611             }
2612             break;
2613         case RAM_SAVE_FLAG_EOS:
2614             /* normal exit */
2615             break;
2616         default:
2617             error_report("Unknown combination of migration flags: %#x"
2618                          " (postcopy mode)", flags);
2619             ret = -EINVAL;
2620         }
2621
2622         if (place_needed) {
2623             /* This gets called at the last target page in the host page */
2624             void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2625
2626             if (all_zero) {
2627                 ret = postcopy_place_page_zero(mis, place_dest,
2628                                                block->page_size);
2629             } else {
2630                 ret = postcopy_place_page(mis, place_dest,
2631                                           place_source, block->page_size);
2632             }
2633         }
2634         if (!ret) {
2635             ret = qemu_file_get_error(f);
2636         }
2637     }
2638
2639     return ret;
2640 }
2641
2642 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2643 {
2644     int flags = 0, ret = 0;
2645     static uint64_t seq_iter;
2646     int len = 0;
2647     /*
2648      * If system is running in postcopy mode, page inserts to host memory must
2649      * be atomic
2650      */
2651     bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2652     /* ADVISE is earlier, it shows the source has the postcopy capability on */
2653     bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
2654
2655     seq_iter++;
2656
2657     if (version_id != 4) {
2658         ret = -EINVAL;
2659     }
2660
2661     /* This RCU critical section can be very long running.
2662      * When RCU reclaims in the code start to become numerous,
2663      * it will be necessary to reduce the granularity of this
2664      * critical section.
2665      */
2666     rcu_read_lock();
2667
2668     if (postcopy_running) {
2669         ret = ram_load_postcopy(f);
2670     }
2671
2672     while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2673         ram_addr_t addr, total_ram_bytes;
2674         void *host = NULL;
2675         uint8_t ch;
2676
2677         addr = qemu_get_be64(f);
2678         flags = addr & ~TARGET_PAGE_MASK;
2679         addr &= TARGET_PAGE_MASK;
2680
2681         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2682                      RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2683             RAMBlock *block = ram_block_from_stream(f, flags);
2684
2685             host = host_from_ram_block_offset(block, addr);
2686             if (!host) {
2687                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2688                 ret = -EINVAL;
2689                 break;
2690             }
2691         }
2692
2693         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2694         case RAM_SAVE_FLAG_MEM_SIZE:
2695             /* Synchronize RAM block list */
2696             total_ram_bytes = addr;
2697             while (!ret && total_ram_bytes) {
2698                 RAMBlock *block;
2699                 char id[256];
2700                 ram_addr_t length;
2701
2702                 len = qemu_get_byte(f);
2703                 qemu_get_buffer(f, (uint8_t *)id, len);
2704                 id[len] = 0;
2705                 length = qemu_get_be64(f);
2706
2707                 block = qemu_ram_block_by_name(id);
2708                 if (block) {
2709                     if (length != block->used_length) {
2710                         Error *local_err = NULL;
2711
2712                         ret = qemu_ram_resize(block, length,
2713                                               &local_err);
2714                         if (local_err) {
2715                             error_report_err(local_err);
2716                         }
2717                     }
2718                     /* For postcopy we need to check hugepage sizes match */
2719                     if (postcopy_advised &&
2720                         block->page_size != qemu_host_page_size) {
2721                         uint64_t remote_page_size = qemu_get_be64(f);
2722                         if (remote_page_size != block->page_size) {
2723                             error_report("Mismatched RAM page size %s "
2724                                          "(local) %zd != %" PRId64,
2725                                          id, block->page_size,
2726                                          remote_page_size);
2727                             ret = -EINVAL;
2728                         }
2729                     }
2730                     ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2731                                           block->idstr);
2732                 } else {
2733                     error_report("Unknown ramblock \"%s\", cannot "
2734                                  "accept migration", id);
2735                     ret = -EINVAL;
2736                 }
2737
2738                 total_ram_bytes -= length;
2739             }
2740             break;
2741
2742         case RAM_SAVE_FLAG_COMPRESS:
2743             ch = qemu_get_byte(f);
2744             ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2745             break;
2746
2747         case RAM_SAVE_FLAG_PAGE:
2748             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2749             break;
2750
2751         case RAM_SAVE_FLAG_COMPRESS_PAGE:
2752             len = qemu_get_be32(f);
2753             if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2754                 error_report("Invalid compressed data length: %d", len);
2755                 ret = -EINVAL;
2756                 break;
2757             }
2758             decompress_data_with_multi_threads(f, host, len);
2759             break;
2760
2761         case RAM_SAVE_FLAG_XBZRLE:
2762             if (load_xbzrle(f, addr, host) < 0) {
2763                 error_report("Failed to decompress XBZRLE page at "
2764                              RAM_ADDR_FMT, addr);
2765                 ret = -EINVAL;
2766                 break;
2767             }
2768             break;
2769         case RAM_SAVE_FLAG_EOS:
2770             /* normal exit */
2771             break;
2772         default:
2773             if (flags & RAM_SAVE_FLAG_HOOK) {
2774                 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2775             } else {
2776                 error_report("Unknown combination of migration flags: %#x",
2777                              flags);
2778                 ret = -EINVAL;
2779             }
2780         }
2781         if (!ret) {
2782             ret = qemu_file_get_error(f);
2783         }
2784     }
2785
2786     wait_for_decompress_done();
2787     rcu_read_unlock();
2788     trace_ram_load_complete(ret, seq_iter);
2789     return ret;
2790 }
2791
2792 static SaveVMHandlers savevm_ram_handlers = {
2793     .save_live_setup = ram_save_setup,
2794     .save_live_iterate = ram_save_iterate,
2795     .save_live_complete_postcopy = ram_save_complete,
2796     .save_live_complete_precopy = ram_save_complete,
2797     .save_live_pending = ram_save_pending,
2798     .load_state = ram_load,
2799     .cleanup = ram_migration_cleanup,
2800 };
2801
2802 void ram_mig_init(void)
2803 {
2804     qemu_mutex_init(&XBZRLE.lock);
2805     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
2806 }