migration/ram.c

   1 /*
   2  * QEMU System Emulator
   3  *
   4  * Copyright (c) 2003-2008 Fabrice Bellard
   5  * Copyright (c) 2011-2015 Red Hat Inc
   6  *
   7  * Authors:
   8  *  Juan Quintela <quintela@redhat.com>
   9  *
  10  * Permission is hereby granted, free of charge, to any person obtaining a copy
  11  * of this software and associated documentation files (the "Software"), to deal
  12  * in the Software without restriction, including without limitation the rights
  13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14  * copies of the Software, and to permit persons to whom the Software is
  15  * furnished to do so, subject to the following conditions:
  16  *
  17  * The above copyright notice and this permission notice shall be included in
  18  * all copies or substantial portions of the Software.
  19  *
  20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26  * THE SOFTWARE.
  27  */
  28 #include "qemu/osdep.h"
  29 #include "qemu-common.h"
  30 #include "cpu.h"
  31 #include <zlib.h>
  32 #include "qapi-event.h"
  33 #include "qemu/cutils.h"
  34 #include "qemu/bitops.h"
  35 #include "qemu/bitmap.h"
  36 #include "qemu/timer.h"
  37 #include "qemu/main-loop.h"
  38 #include "migration/migration.h"
  39 #include "migration/postcopy-ram.h"
  40 #include "exec/address-spaces.h"
  41 #include "migration/page_cache.h"
  42 #include "qemu/error-report.h"
  43 #include "trace.h"
  44 #include "exec/ram_addr.h"
  45 #include "qemu/rcu_queue.h"
  46 #include "migration/colo.h"
  47
  48 static int dirty_rate_high_cnt;
  49
  50 static uint64_t bitmap_sync_count;
  51
  52 /***********************************************************/
  53 /* ram save/restore */
  54
  55 #define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
  56 #define RAM_SAVE_FLAG_COMPRESS 0x02
  57 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
  58 #define RAM_SAVE_FLAG_PAGE     0x08
  59 #define RAM_SAVE_FLAG_EOS      0x10
  60 #define RAM_SAVE_FLAG_CONTINUE 0x20
  61 #define RAM_SAVE_FLAG_XBZRLE   0x40
  62 /* 0x80 is reserved in migration.h start with 0x100 next */
  63 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
  64
  65 static uint8_t *ZERO_TARGET_PAGE;
  66
  67 static inline bool is_zero_range(uint8_t *p, uint64_t size)
  68 {
  69     return buffer_is_zero(p, size);
  70 }
  71
  72 /* struct contains XBZRLE cache and a static page
  73    used by the compression */
  74 static struct {
  75     /* buffer used for XBZRLE encoding */
  76     uint8_t *encoded_buf;
  77     /* buffer for storing page content */
  78     uint8_t *current_buf;
  79     /* Cache for XBZRLE, Protected by lock. */
  80     PageCache *cache;
  81     QemuMutex lock;
  82 } XBZRLE;
  83
  84 /* buffer used for XBZRLE decoding */
  85 static uint8_t *xbzrle_decoded_buf;
  86
  87 static void XBZRLE_cache_lock(void)
  88 {
  89     if (migrate_use_xbzrle())
  90         qemu_mutex_lock(&XBZRLE.lock);
  91 }
  92
  93 static void XBZRLE_cache_unlock(void)
  94 {
  95     if (migrate_use_xbzrle())
  96         qemu_mutex_unlock(&XBZRLE.lock);
  97 }
  98
  99 /*
 100  * called from qmp_migrate_set_cache_size in main thread, possibly while
 101  * a migration is in progress.
 102  * A running migration maybe using the cache and might finish during this
 103  * call, hence changes to the cache are protected by XBZRLE.lock().
 104  */
 105 int64_t xbzrle_cache_resize(int64_t new_size)
 106 {
 107     PageCache *new_cache;
 108     int64_t ret;
 109
 110     if (new_size < TARGET_PAGE_SIZE) {
 111         return -1;
 112     }
 113
 114     XBZRLE_cache_lock();
 115
 116     if (XBZRLE.cache != NULL) {
 117         if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
 118             goto out_new_size;
 119         }
 120         new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
 121                                         TARGET_PAGE_SIZE);
 122         if (!new_cache) {
 123             error_report("Error creating cache");
 124             ret = -1;
 125             goto out;
 126         }
 127
 128         cache_fini(XBZRLE.cache);
 129         XBZRLE.cache = new_cache;
 130     }
 131
 132 out_new_size:
 133     ret = pow2floor(new_size);
 134 out:
 135     XBZRLE_cache_unlock();
 136     return ret;
 137 }
 138
 139 /* accounting for migration statistics */
 140 typedef struct AccountingInfo {
 141     uint64_t dup_pages;
 142     uint64_t skipped_pages;
 143     uint64_t norm_pages;
 144     uint64_t iterations;
 145     uint64_t xbzrle_bytes;
 146     uint64_t xbzrle_pages;
 147     uint64_t xbzrle_cache_miss;
 148     double xbzrle_cache_miss_rate;
 149     uint64_t xbzrle_overflows;
 150 } AccountingInfo;
 151
 152 static AccountingInfo acct_info;
 153
 154 static void acct_clear(void)
 155 {
 156     memset(&acct_info, 0, sizeof(acct_info));
 157 }
 158
 159 uint64_t dup_mig_bytes_transferred(void)
 160 {
 161     return acct_info.dup_pages * TARGET_PAGE_SIZE;
 162 }
 163
 164 uint64_t dup_mig_pages_transferred(void)
 165 {
 166     return acct_info.dup_pages;
 167 }
 168
 169 uint64_t skipped_mig_bytes_transferred(void)
 170 {
 171     return acct_info.skipped_pages * TARGET_PAGE_SIZE;
 172 }
 173
 174 uint64_t skipped_mig_pages_transferred(void)
 175 {
 176     return acct_info.skipped_pages;
 177 }
 178
 179 uint64_t norm_mig_bytes_transferred(void)
 180 {
 181     return acct_info.norm_pages * TARGET_PAGE_SIZE;
 182 }
 183
 184 uint64_t norm_mig_pages_transferred(void)
 185 {
 186     return acct_info.norm_pages;
 187 }
 188
 189 uint64_t xbzrle_mig_bytes_transferred(void)
 190 {
 191     return acct_info.xbzrle_bytes;
 192 }
 193
 194 uint64_t xbzrle_mig_pages_transferred(void)
 195 {
 196     return acct_info.xbzrle_pages;
 197 }
 198
 199 uint64_t xbzrle_mig_pages_cache_miss(void)
 200 {
 201     return acct_info.xbzrle_cache_miss;
 202 }
 203
 204 double xbzrle_mig_cache_miss_rate(void)
 205 {
 206     return acct_info.xbzrle_cache_miss_rate;
 207 }
 208
 209 uint64_t xbzrle_mig_pages_overflow(void)
 210 {
 211     return acct_info.xbzrle_overflows;
 212 }
 213
 214 /* This is the last block that we have visited serching for dirty pages
 215  */
 216 static RAMBlock *last_seen_block;
 217 /* This is the last block from where we have sent data */
 218 static RAMBlock *last_sent_block;
 219 static ram_addr_t last_offset;
 220 static QemuMutex migration_bitmap_mutex;
 221 static uint64_t migration_dirty_pages;
 222 static uint32_t last_version;
 223 static bool ram_bulk_stage;
 224
 225 /* used by the search for pages to send */
 226 struct PageSearchStatus {
 227     /* Current block being searched */
 228     RAMBlock    *block;
 229     /* Current offset to search from */
 230     ram_addr_t   offset;
 231     /* Set once we wrap around */
 232     bool         complete_round;
 233 };
 234 typedef struct PageSearchStatus PageSearchStatus;
 235
 236 static struct BitmapRcu {
 237     struct rcu_head rcu;
 238     /* Main migration bitmap */
 239     unsigned long *bmap;
 240     /* bitmap of pages that haven't been sent even once
 241      * only maintained and used in postcopy at the moment
 242      * where it's used to send the dirtymap at the start
 243      * of the postcopy phase
 244      */
 245     unsigned long *unsentmap;
 246 } *migration_bitmap_rcu;
 247
 248 struct CompressParam {
 249     bool done;
 250     bool quit;
 251     QEMUFile *file;
 252     QemuMutex mutex;
 253     QemuCond cond;
 254     RAMBlock *block;
 255     ram_addr_t offset;
 256 };
 257 typedef struct CompressParam CompressParam;
 258
 259 struct DecompressParam {
 260     bool done;
 261     bool quit;
 262     QemuMutex mutex;
 263     QemuCond cond;
 264     void *des;
 265     uint8_t *compbuf;
 266     int len;
 267 };
 268 typedef struct DecompressParam DecompressParam;
 269
 270 static CompressParam *comp_param;
 271 static QemuThread *compress_threads;
 272 /* comp_done_cond is used to wake up the migration thread when
 273  * one of the compression threads has finished the compression.
 274  * comp_done_lock is used to co-work with comp_done_cond.
 275  */
 276 static QemuMutex comp_done_lock;
 277 static QemuCond comp_done_cond;
 278 /* The empty QEMUFileOps will be used by file in CompressParam */
 279 static const QEMUFileOps empty_ops = { };
 280
 281 static bool compression_switch;
 282 static DecompressParam *decomp_param;
 283 static QemuThread *decompress_threads;
 284 static QemuMutex decomp_done_lock;
 285 static QemuCond decomp_done_cond;
 286
 287 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
 288                                 ram_addr_t offset);
 289
 290 static void *do_data_compress(void *opaque)
 291 {
 292     CompressParam *param = opaque;
 293     RAMBlock *block;
 294     ram_addr_t offset;
 295
 296     qemu_mutex_lock(&param->mutex);
 297     while (!param->quit) {
 298         if (param->block) {
 299             block = param->block;
 300             offset = param->offset;
 301             param->block = NULL;
 302             qemu_mutex_unlock(&param->mutex);
 303
 304             do_compress_ram_page(param->file, block, offset);
 305
 306             qemu_mutex_lock(&comp_done_lock);
 307             param->done = true;
 308             qemu_cond_signal(&comp_done_cond);
 309             qemu_mutex_unlock(&comp_done_lock);
 310
 311             qemu_mutex_lock(&param->mutex);
 312         } else {
 313             qemu_cond_wait(&param->cond, &param->mutex);
 314         }
 315     }
 316     qemu_mutex_unlock(&param->mutex);
 317
 318     return NULL;
 319 }
 320
 321 static inline void terminate_compression_threads(void)
 322 {
 323     int idx, thread_count;
 324
 325     thread_count = migrate_compress_threads();
 326     for (idx = 0; idx < thread_count; idx++) {
 327         qemu_mutex_lock(&comp_param[idx].mutex);
 328         comp_param[idx].quit = true;
 329         qemu_cond_signal(&comp_param[idx].cond);
 330         qemu_mutex_unlock(&comp_param[idx].mutex);
 331     }
 332 }
 333
 334 void migrate_compress_threads_join(void)
 335 {
 336     int i, thread_count;
 337
 338     if (!migrate_use_compression()) {
 339         return;
 340     }
 341     terminate_compression_threads();
 342     thread_count = migrate_compress_threads();
 343     for (i = 0; i < thread_count; i++) {
 344         qemu_thread_join(compress_threads + i);
 345         qemu_fclose(comp_param[i].file);
 346         qemu_mutex_destroy(&comp_param[i].mutex);
 347         qemu_cond_destroy(&comp_param[i].cond);
 348     }
 349     qemu_mutex_destroy(&comp_done_lock);
 350     qemu_cond_destroy(&comp_done_cond);
 351     g_free(compress_threads);
 352     g_free(comp_param);
 353     compress_threads = NULL;
 354     comp_param = NULL;
 355 }
 356
 357 void migrate_compress_threads_create(void)
 358 {
 359     int i, thread_count;
 360
 361     if (!migrate_use_compression()) {
 362         return;
 363     }
 364     compression_switch = true;
 365     thread_count = migrate_compress_threads();
 366     compress_threads = g_new0(QemuThread, thread_count);
 367     comp_param = g_new0(CompressParam, thread_count);
 368     qemu_cond_init(&comp_done_cond);
 369     qemu_mutex_init(&comp_done_lock);
 370     for (i = 0; i < thread_count; i++) {
 371         /* comp_param[i].file is just used as a dummy buffer to save data,
 372          * set its ops to empty.
 373          */
 374         comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
 375         comp_param[i].done = true;
 376         comp_param[i].quit = false;
 377         qemu_mutex_init(&comp_param[i].mutex);
 378         qemu_cond_init(&comp_param[i].cond);
 379         qemu_thread_create(compress_threads + i, "compress",
 380                            do_data_compress, comp_param + i,
 381                            QEMU_THREAD_JOINABLE);
 382     }
 383 }
 384
 385 /**
 386  * save_page_header: Write page header to wire
 387  *
 388  * If this is the 1st block, it also writes the block identification
 389  *
 390  * Returns: Number of bytes written
 391  *
 392  * @f: QEMUFile where to send the data
 393  * @block: block that contains the page we want to send
 394  * @offset: offset inside the block for the page
 395  *          in the lower bits, it contains flags
 396  */
 397 static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
 398 {
 399     size_t size, len;
 400
 401     qemu_put_be64(f, offset);
 402     size = 8;
 403
 404     if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
 405         len = strlen(block->idstr);
 406         qemu_put_byte(f, len);
 407         qemu_put_buffer(f, (uint8_t *)block->idstr, len);
 408         size += 1 + len;
 409     }
 410     return size;
 411 }
 412
 413 /* Reduce amount of guest cpu execution to hopefully slow down memory writes.
 414  * If guest dirty memory rate is reduced below the rate at which we can
 415  * transfer pages to the destination then we should be able to complete
 416  * migration. Some workloads dirty memory way too fast and will not effectively
 417  * converge, even with auto-converge.
 418  */
 419 static void mig_throttle_guest_down(void)
 420 {
 421     MigrationState *s = migrate_get_current();
 422     uint64_t pct_initial = s->parameters.cpu_throttle_initial;
 423     uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
 424
 425     /* We have not started throttling yet. Let's start it. */
 426     if (!cpu_throttle_active()) {
 427         cpu_throttle_set(pct_initial);
 428     } else {
 429         /* Throttling already on, just increase the rate */
 430         cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
 431     }
 432 }
 433
 434 /* Update the xbzrle cache to reflect a page that's been sent as all 0.
 435  * The important thing is that a stale (not-yet-0'd) page be replaced
 436  * by the new data.
 437  * As a bonus, if the page wasn't in the cache it gets added so that
 438  * when a small write is made into the 0'd page it gets XBZRLE sent
 439  */
 440 static void xbzrle_cache_zero_page(ram_addr_t current_addr)
 441 {
 442     if (ram_bulk_stage || !migrate_use_xbzrle()) {
 443         return;
 444     }
 445
 446     /* We don't care if this fails to allocate a new cache page
 447      * as long as it updated an old one */
 448     cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
 449                  bitmap_sync_count);
 450 }
 451
 452 #define ENCODING_FLAG_XBZRLE 0x1
 453
 454 /**
 455  * save_xbzrle_page: compress and send current page
 456  *
 457  * Returns: 1 means that we wrote the page
 458  *          0 means that page is identical to the one already sent
 459  *          -1 means that xbzrle would be longer than normal
 460  *
 461  * @f: QEMUFile where to send the data
 462  * @current_data:
 463  * @current_addr:
 464  * @block: block that contains the page we want to send
 465  * @offset: offset inside the block for the page
 466  * @last_stage: if we are at the completion stage
 467  * @bytes_transferred: increase it with the number of transferred bytes
 468  */
 469 static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
 470                             ram_addr_t current_addr, RAMBlock *block,
 471                             ram_addr_t offset, bool last_stage,
 472                             uint64_t *bytes_transferred)
 473 {
 474     int encoded_len = 0, bytes_xbzrle;
 475     uint8_t *prev_cached_page;
 476
 477     if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
 478         acct_info.xbzrle_cache_miss++;
 479         if (!last_stage) {
 480             if (cache_insert(XBZRLE.cache, current_addr, *current_data,
 481                              bitmap_sync_count) == -1) {
 482                 return -1;
 483             } else {
 484                 /* update *current_data when the page has been
 485                    inserted into cache */
 486                 *current_data = get_cached_data(XBZRLE.cache, current_addr);
 487             }
 488         }
 489         return -1;
 490     }
 491
 492     prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
 493
 494     /* save current buffer into memory */
 495     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
 496
 497     /* XBZRLE encoding (if there is no overflow) */
 498     encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
 499                                        TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
 500                                        TARGET_PAGE_SIZE);
 501     if (encoded_len == 0) {
 502         trace_save_xbzrle_page_skipping();
 503         return 0;
 504     } else if (encoded_len == -1) {
 505         trace_save_xbzrle_page_overflow();
 506         acct_info.xbzrle_overflows++;
 507         /* update data in the cache */
 508         if (!last_stage) {
 509             memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
 510             *current_data = prev_cached_page;
 511         }
 512         return -1;
 513     }
 514
 515     /* we need to update the data in the cache, in order to get the same data */
 516     if (!last_stage) {
 517         memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
 518     }
 519
 520     /* Send XBZRLE based compressed page */
 521     bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
 522     qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
 523     qemu_put_be16(f, encoded_len);
 524     qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
 525     bytes_xbzrle += encoded_len + 1 + 2;
 526     acct_info.xbzrle_pages++;
 527     acct_info.xbzrle_bytes += bytes_xbzrle;
 528     *bytes_transferred += bytes_xbzrle;
 529
 530     return 1;
 531 }
 532
 533 /* Called with rcu_read_lock() to protect migration_bitmap
 534  * rb: The RAMBlock  to search for dirty pages in
 535  * start: Start address (typically so we can continue from previous page)
 536  * ram_addr_abs: Pointer into which to store the address of the dirty page
 537  *               within the global ram_addr space
 538  *
 539  * Returns: byte offset within memory region of the start of a dirty page
 540  */
 541 static inline
 542 ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb,
 543                                        ram_addr_t start,
 544                                        ram_addr_t *ram_addr_abs)
 545 {
 546     unsigned long base = rb->offset >> TARGET_PAGE_BITS;
 547     unsigned long nr = base + (start >> TARGET_PAGE_BITS);
 548     uint64_t rb_size = rb->used_length;
 549     unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
 550     unsigned long *bitmap;
 551
 552     unsigned long next;
 553
 554     bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
 555     if (ram_bulk_stage && nr > base) {
 556         next = nr + 1;
 557     } else {
 558         next = find_next_bit(bitmap, size, nr);
 559     }
 560
 561     *ram_addr_abs = next << TARGET_PAGE_BITS;
 562     return (next - base) << TARGET_PAGE_BITS;
 563 }
 564
 565 static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
 566 {
 567     bool ret;
 568     int nr = addr >> TARGET_PAGE_BITS;
 569     unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
 570
 571     ret = test_and_clear_bit(nr, bitmap);
 572
 573     if (ret) {
 574         migration_dirty_pages--;
 575     }
 576     return ret;
 577 }
 578
 579 static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
 580 {
 581     unsigned long *bitmap;
 582     bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
 583     migration_dirty_pages +=
 584         cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
 585 }
 586
 587 /* Fix me: there are too many global variables used in migration process. */
 588 static int64_t start_time;
 589 static int64_t bytes_xfer_prev;
 590 static int64_t num_dirty_pages_period;
 591 static uint64_t xbzrle_cache_miss_prev;
 592 static uint64_t iterations_prev;
 593
 594 static void migration_bitmap_sync_init(void)
 595 {
 596     start_time = 0;
 597     bytes_xfer_prev = 0;
 598     num_dirty_pages_period = 0;
 599     xbzrle_cache_miss_prev = 0;
 600     iterations_prev = 0;
 601 }
 602
 603 /* Returns a summary bitmap of the page sizes of all RAMBlocks;
 604  * for VMs with just normal pages this is equivalent to the
 605  * host page size.  If it's got some huge pages then it's the OR
 606  * of all the different page sizes.
 607  */
 608 uint64_t ram_pagesize_summary(void)
 609 {
 610     RAMBlock *block;
 611     uint64_t summary = 0;
 612
 613     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 614         summary |= block->page_size;
 615     }
 616
 617     return summary;
 618 }
 619
 620 static void migration_bitmap_sync(void)
 621 {
 622     RAMBlock *block;
 623     uint64_t num_dirty_pages_init = migration_dirty_pages;
 624     MigrationState *s = migrate_get_current();
 625     int64_t end_time;
 626     int64_t bytes_xfer_now;
 627
 628     bitmap_sync_count++;
 629
 630     if (!bytes_xfer_prev) {
 631         bytes_xfer_prev = ram_bytes_transferred();
 632     }
 633
 634     if (!start_time) {
 635         start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 636     }
 637
 638     trace_migration_bitmap_sync_start();
 639     memory_global_dirty_log_sync();
 640
 641     qemu_mutex_lock(&migration_bitmap_mutex);
 642     rcu_read_lock();
 643     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 644         migration_bitmap_sync_range(block->offset, block->used_length);
 645     }
 646     rcu_read_unlock();
 647     qemu_mutex_unlock(&migration_bitmap_mutex);
 648
 649     trace_migration_bitmap_sync_end(migration_dirty_pages
 650                                     - num_dirty_pages_init);
 651     num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
 652     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 653
 654     /* more than 1 second = 1000 millisecons */
 655     if (end_time > start_time + 1000) {
 656         if (migrate_auto_converge()) {
 657             /* The following detection logic can be refined later. For now:
 658                Check to see if the dirtied bytes is 50% more than the approx.
 659                amount of bytes that just got transferred since the last time we
 660                were in this routine. If that happens twice, start or increase
 661                throttling */
 662             bytes_xfer_now = ram_bytes_transferred();
 663
 664             if (s->dirty_pages_rate &&
 665                (num_dirty_pages_period * TARGET_PAGE_SIZE >
 666                    (bytes_xfer_now - bytes_xfer_prev)/2) &&
 667                (dirty_rate_high_cnt++ >= 2)) {
 668                     trace_migration_throttle();
 669                     dirty_rate_high_cnt = 0;
 670                     mig_throttle_guest_down();
 671              }
 672              bytes_xfer_prev = bytes_xfer_now;
 673         }
 674
 675         if (migrate_use_xbzrle()) {
 676             if (iterations_prev != acct_info.iterations) {
 677                 acct_info.xbzrle_cache_miss_rate =
 678                    (double)(acct_info.xbzrle_cache_miss -
 679                             xbzrle_cache_miss_prev) /
 680                    (acct_info.iterations - iterations_prev);
 681             }
 682             iterations_prev = acct_info.iterations;
 683             xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
 684         }
 685         s->dirty_pages_rate = num_dirty_pages_period * 1000
 686             / (end_time - start_time);
 687         s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
 688         start_time = end_time;
 689         num_dirty_pages_period = 0;
 690     }
 691     s->dirty_sync_count = bitmap_sync_count;
 692     if (migrate_use_events()) {
 693         qapi_event_send_migration_pass(bitmap_sync_count, NULL);
 694     }
 695 }
 696
 697 /**
 698  * save_zero_page: Send the zero page to the stream
 699  *
 700  * Returns: Number of pages written.
 701  *
 702  * @f: QEMUFile where to send the data
 703  * @block: block that contains the page we want to send
 704  * @offset: offset inside the block for the page
 705  * @p: pointer to the page
 706  * @bytes_transferred: increase it with the number of transferred bytes
 707  */
 708 static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
 709                           uint8_t *p, uint64_t *bytes_transferred)
 710 {
 711     int pages = -1;
 712
 713     if (is_zero_range(p, TARGET_PAGE_SIZE)) {
 714         acct_info.dup_pages++;
 715         *bytes_transferred += save_page_header(f, block,
 716                                                offset | RAM_SAVE_FLAG_COMPRESS);
 717         qemu_put_byte(f, 0);
 718         *bytes_transferred += 1;
 719         pages = 1;
 720     }
 721
 722     return pages;
 723 }
 724
 725 static void ram_release_pages(MigrationState *ms, const char *block_name,
 726                               uint64_t offset, int pages)
 727 {
 728     if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
 729         return;
 730     }
 731
 732     ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS);
 733 }
 734
 735 /**
 736  * ram_save_page: Send the given page to the stream
 737  *
 738  * Returns: Number of pages written.
 739  *          < 0 - error
 740  *          >=0 - Number of pages written - this might legally be 0
 741  *                if xbzrle noticed the page was the same.
 742  *
 743  * @ms: The current migration state.
 744  * @f: QEMUFile where to send the data
 745  * @block: block that contains the page we want to send
 746  * @offset: offset inside the block for the page
 747  * @last_stage: if we are at the completion stage
 748  * @bytes_transferred: increase it with the number of transferred bytes
 749  */
 750 static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
 751                          bool last_stage, uint64_t *bytes_transferred)
 752 {
 753     int pages = -1;
 754     uint64_t bytes_xmit;
 755     ram_addr_t current_addr;
 756     uint8_t *p;
 757     int ret;
 758     bool send_async = true;
 759     RAMBlock *block = pss->block;
 760     ram_addr_t offset = pss->offset;
 761
 762     p = block->host + offset;
 763
 764     /* In doubt sent page as normal */
 765     bytes_xmit = 0;
 766     ret = ram_control_save_page(f, block->offset,
 767                            offset, TARGET_PAGE_SIZE, &bytes_xmit);
 768     if (bytes_xmit) {
 769         *bytes_transferred += bytes_xmit;
 770         pages = 1;
 771     }
 772
 773     XBZRLE_cache_lock();
 774
 775     current_addr = block->offset + offset;
 776
 777     if (block == last_sent_block) {
 778         offset |= RAM_SAVE_FLAG_CONTINUE;
 779     }
 780     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
 781         if (ret != RAM_SAVE_CONTROL_DELAYED) {
 782             if (bytes_xmit > 0) {
 783                 acct_info.norm_pages++;
 784             } else if (bytes_xmit == 0) {
 785                 acct_info.dup_pages++;
 786             }
 787         }
 788     } else {
 789         pages = save_zero_page(f, block, offset, p, bytes_transferred);
 790         if (pages > 0) {
 791             /* Must let xbzrle know, otherwise a previous (now 0'd) cached
 792              * page would be stale
 793              */
 794             xbzrle_cache_zero_page(current_addr);
 795             ram_release_pages(ms, block->idstr, pss->offset, pages);
 796         } else if (!ram_bulk_stage &&
 797                    !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
 798             pages = save_xbzrle_page(f, &p, current_addr, block,
 799                                      offset, last_stage, bytes_transferred);
 800             if (!last_stage) {
 801                 /* Can't send this cached data async, since the cache page
 802                  * might get updated before it gets to the wire
 803                  */
 804                 send_async = false;
 805             }
 806         }
 807     }
 808
 809     /* XBZRLE overflow or normal page */
 810     if (pages == -1) {
 811         *bytes_transferred += save_page_header(f, block,
 812                                                offset | RAM_SAVE_FLAG_PAGE);
 813         if (send_async) {
 814             qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
 815                                   migrate_release_ram() &
 816                                   migration_in_postcopy(ms));
 817         } else {
 818             qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
 819         }
 820         *bytes_transferred += TARGET_PAGE_SIZE;
 821         pages = 1;
 822         acct_info.norm_pages++;
 823     }
 824
 825     XBZRLE_cache_unlock();
 826
 827     return pages;
 828 }
 829
 830 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
 831                                 ram_addr_t offset)
 832 {
 833     int bytes_sent, blen;
 834     uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
 835
 836     bytes_sent = save_page_header(f, block, offset |
 837                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
 838     blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
 839                                      migrate_compress_level());
 840     if (blen < 0) {
 841         bytes_sent = 0;
 842         qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
 843         error_report("compressed data failed!");
 844     } else {
 845         bytes_sent += blen;
 846         ram_release_pages(migrate_get_current(), block->idstr,
 847                           offset & TARGET_PAGE_MASK, 1);
 848     }
 849
 850     return bytes_sent;
 851 }
 852
 853 static uint64_t bytes_transferred;
 854
 855 static void flush_compressed_data(QEMUFile *f)
 856 {
 857     int idx, len, thread_count;
 858
 859     if (!migrate_use_compression()) {
 860         return;
 861     }
 862     thread_count = migrate_compress_threads();
 863
 864     qemu_mutex_lock(&comp_done_lock);
 865     for (idx = 0; idx < thread_count; idx++) {
 866         while (!comp_param[idx].done) {
 867             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
 868         }
 869     }
 870     qemu_mutex_unlock(&comp_done_lock);
 871
 872     for (idx = 0; idx < thread_count; idx++) {
 873         qemu_mutex_lock(&comp_param[idx].mutex);
 874         if (!comp_param[idx].quit) {
 875             len = qemu_put_qemu_file(f, comp_param[idx].file);
 876             bytes_transferred += len;
 877         }
 878         qemu_mutex_unlock(&comp_param[idx].mutex);
 879     }
 880 }
 881
 882 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
 883                                        ram_addr_t offset)
 884 {
 885     param->block = block;
 886     param->offset = offset;
 887 }
 888
 889 static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
 890                                            ram_addr_t offset,
 891                                            uint64_t *bytes_transferred)
 892 {
 893     int idx, thread_count, bytes_xmit = -1, pages = -1;
 894
 895     thread_count = migrate_compress_threads();
 896     qemu_mutex_lock(&comp_done_lock);
 897     while (true) {
 898         for (idx = 0; idx < thread_count; idx++) {
 899             if (comp_param[idx].done) {
 900                 comp_param[idx].done = false;
 901                 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
 902                 qemu_mutex_lock(&comp_param[idx].mutex);
 903                 set_compress_params(&comp_param[idx], block, offset);
 904                 qemu_cond_signal(&comp_param[idx].cond);
 905                 qemu_mutex_unlock(&comp_param[idx].mutex);
 906                 pages = 1;
 907                 acct_info.norm_pages++;
 908                 *bytes_transferred += bytes_xmit;
 909                 break;
 910             }
 911         }
 912         if (pages > 0) {
 913             break;
 914         } else {
 915             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
 916         }
 917     }
 918     qemu_mutex_unlock(&comp_done_lock);
 919
 920     return pages;
 921 }
 922
 923 /**
 924  * ram_save_compressed_page: compress the given page and send it to the stream
 925  *
 926  * Returns: Number of pages written.
 927  *
 928  * @ms: The current migration state.
 929  * @f: QEMUFile where to send the data
 930  * @block: block that contains the page we want to send
 931  * @offset: offset inside the block for the page
 932  * @last_stage: if we are at the completion stage
 933  * @bytes_transferred: increase it with the number of transferred bytes
 934  */
 935 static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f,
 936                                     PageSearchStatus *pss, bool last_stage,
 937                                     uint64_t *bytes_transferred)
 938 {
 939     int pages = -1;
 940     uint64_t bytes_xmit = 0;
 941     uint8_t *p;
 942     int ret, blen;
 943     RAMBlock *block = pss->block;
 944     ram_addr_t offset = pss->offset;
 945
 946     p = block->host + offset;
 947
 948     ret = ram_control_save_page(f, block->offset,
 949                                 offset, TARGET_PAGE_SIZE, &bytes_xmit);
 950     if (bytes_xmit) {
 951         *bytes_transferred += bytes_xmit;
 952         pages = 1;
 953     }
 954     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
 955         if (ret != RAM_SAVE_CONTROL_DELAYED) {
 956             if (bytes_xmit > 0) {
 957                 acct_info.norm_pages++;
 958             } else if (bytes_xmit == 0) {
 959                 acct_info.dup_pages++;
 960             }
 961         }
 962     } else {
 963         /* When starting the process of a new block, the first page of
 964          * the block should be sent out before other pages in the same
 965          * block, and all the pages in last block should have been sent
 966          * out, keeping this order is important, because the 'cont' flag
 967          * is used to avoid resending the block name.
 968          */
 969         if (block != last_sent_block) {
 970             flush_compressed_data(f);
 971             pages = save_zero_page(f, block, offset, p, bytes_transferred);
 972             if (pages == -1) {
 973                 /* Make sure the first page is sent out before other pages */
 974                 bytes_xmit = save_page_header(f, block, offset |
 975                                               RAM_SAVE_FLAG_COMPRESS_PAGE);
 976                 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
 977                                                  migrate_compress_level());
 978                 if (blen > 0) {
 979                     *bytes_transferred += bytes_xmit + blen;
 980                     acct_info.norm_pages++;
 981                     pages = 1;
 982                 } else {
 983                     qemu_file_set_error(f, blen);
 984                     error_report("compressed data failed!");
 985                 }
 986             }
 987             if (pages > 0) {
 988                 ram_release_pages(ms, block->idstr, pss->offset, pages);
 989             }
 990         } else {
 991             offset |= RAM_SAVE_FLAG_CONTINUE;
 992             pages = save_zero_page(f, block, offset, p, bytes_transferred);
 993             if (pages == -1) {
 994                 pages = compress_page_with_multi_thread(f, block, offset,
 995                                                         bytes_transferred);
 996             } else {
 997                 ram_release_pages(ms, block->idstr, pss->offset, pages);
 998             }
 999         }
1000     }
1001
1002     return pages;
1003 }
1004
1005 /*
1006  * Find the next dirty page and update any state associated with
1007  * the search process.
1008  *
1009  * Returns: True if a page is found
1010  *
1011  * @f: Current migration stream.
1012  * @pss: Data about the state of the current dirty page scan.
1013  * @*again: Set to false if the search has scanned the whole of RAM
1014  * *ram_addr_abs: Pointer into which to store the address of the dirty page
1015  *               within the global ram_addr space
1016  */
1017 static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
1018                              bool *again, ram_addr_t *ram_addr_abs)
1019 {
1020     pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset,
1021                                               ram_addr_abs);
1022     if (pss->complete_round && pss->block == last_seen_block &&
1023         pss->offset >= last_offset) {
1024         /*
1025          * We've been once around the RAM and haven't found anything.
1026          * Give up.
1027          */
1028         *again = false;
1029         return false;
1030     }
1031     if (pss->offset >= pss->block->used_length) {
1032         /* Didn't find anything in this RAM Block */
1033         pss->offset = 0;
1034         pss->block = QLIST_NEXT_RCU(pss->block, next);
1035         if (!pss->block) {
1036             /* Hit the end of the list */
1037             pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1038             /* Flag that we've looped */
1039             pss->complete_round = true;
1040             ram_bulk_stage = false;
1041             if (migrate_use_xbzrle()) {
1042                 /* If xbzrle is on, stop using the data compression at this
1043                  * point. In theory, xbzrle can do better than compression.
1044                  */
1045                 flush_compressed_data(f);
1046                 compression_switch = false;
1047             }
1048         }
1049         /* Didn't find anything this time, but try again on the new block */
1050         *again = true;
1051         return false;
1052     } else {
1053         /* Can go around again, but... */
1054         *again = true;
1055         /* We've found something so probably don't need to */
1056         return true;
1057     }
1058 }
1059
1060 /*
1061  * Helper for 'get_queued_page' - gets a page off the queue
1062  *      ms:      MigrationState in
1063  * *offset:      Used to return the offset within the RAMBlock
1064  * ram_addr_abs: global offset in the dirty/sent bitmaps
1065  *
1066  * Returns:      block (or NULL if none available)
1067  */
1068 static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1069                               ram_addr_t *ram_addr_abs)
1070 {
1071     RAMBlock *block = NULL;
1072
1073     qemu_mutex_lock(&ms->src_page_req_mutex);
1074     if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1075         struct MigrationSrcPageRequest *entry =
1076                                 QSIMPLEQ_FIRST(&ms->src_page_requests);
1077         block = entry->rb;
1078         *offset = entry->offset;
1079         *ram_addr_abs = (entry->offset + entry->rb->offset) &
1080                         TARGET_PAGE_MASK;
1081
1082         if (entry->len > TARGET_PAGE_SIZE) {
1083             entry->len -= TARGET_PAGE_SIZE;
1084             entry->offset += TARGET_PAGE_SIZE;
1085         } else {
1086             memory_region_unref(block->mr);
1087             QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1088             g_free(entry);
1089         }
1090     }
1091     qemu_mutex_unlock(&ms->src_page_req_mutex);
1092
1093     return block;
1094 }
1095
1096 /*
1097  * Unqueue a page from the queue fed by postcopy page requests; skips pages
1098  * that are already sent (!dirty)
1099  *
1100  *      ms:      MigrationState in
1101  *     pss:      PageSearchStatus structure updated with found block/offset
1102  * ram_addr_abs: global offset in the dirty/sent bitmaps
1103  *
1104  * Returns:      true if a queued page is found
1105  */
1106 static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
1107                             ram_addr_t *ram_addr_abs)
1108 {
1109     RAMBlock  *block;
1110     ram_addr_t offset;
1111     bool dirty;
1112
1113     do {
1114         block = unqueue_page(ms, &offset, ram_addr_abs);
1115         /*
1116          * We're sending this page, and since it's postcopy nothing else
1117          * will dirty it, and we must make sure it doesn't get sent again
1118          * even if this queue request was received after the background
1119          * search already sent it.
1120          */
1121         if (block) {
1122             unsigned long *bitmap;
1123             bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1124             dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1125             if (!dirty) {
1126                 trace_get_queued_page_not_dirty(
1127                     block->idstr, (uint64_t)offset,
1128                     (uint64_t)*ram_addr_abs,
1129                     test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1130                          atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1131             } else {
1132                 trace_get_queued_page(block->idstr,
1133                                       (uint64_t)offset,
1134                                       (uint64_t)*ram_addr_abs);
1135             }
1136         }
1137
1138     } while (block && !dirty);
1139
1140     if (block) {
1141         /*
1142          * As soon as we start servicing pages out of order, then we have
1143          * to kill the bulk stage, since the bulk stage assumes
1144          * in (migration_bitmap_find_and_reset_dirty) that every page is
1145          * dirty, that's no longer true.
1146          */
1147         ram_bulk_stage = false;
1148
1149         /*
1150          * We want the background search to continue from the queued page
1151          * since the guest is likely to want other pages near to the page
1152          * it just requested.
1153          */
1154         pss->block = block;
1155         pss->offset = offset;
1156     }
1157
1158     return !!block;
1159 }
1160
1161 /**
1162  * flush_page_queue: Flush any remaining pages in the ram request queue
1163  *    it should be empty at the end anyway, but in error cases there may be
1164  *    some left.
1165  *
1166  * ms: MigrationState
1167  */
1168 void flush_page_queue(MigrationState *ms)
1169 {
1170     struct MigrationSrcPageRequest *mspr, *next_mspr;
1171     /* This queue generally should be empty - but in the case of a failed
1172      * migration might have some droppings in.
1173      */
1174     rcu_read_lock();
1175     QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1176         memory_region_unref(mspr->rb->mr);
1177         QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1178         g_free(mspr);
1179     }
1180     rcu_read_unlock();
1181 }
1182
1183 /**
1184  * Queue the pages for transmission, e.g. a request from postcopy destination
1185  *   ms: MigrationStatus in which the queue is held
1186  *   rbname: The RAMBlock the request is for - may be NULL (to mean reuse last)
1187  *   start: Offset from the start of the RAMBlock
1188  *   len: Length (in bytes) to send
1189  *   Return: 0 on success
1190  */
1191 int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1192                          ram_addr_t start, ram_addr_t len)
1193 {
1194     RAMBlock *ramblock;
1195
1196     ms->postcopy_requests++;
1197     rcu_read_lock();
1198     if (!rbname) {
1199         /* Reuse last RAMBlock */
1200         ramblock = ms->last_req_rb;
1201
1202         if (!ramblock) {
1203             /*
1204              * Shouldn't happen, we can't reuse the last RAMBlock if
1205              * it's the 1st request.
1206              */
1207             error_report("ram_save_queue_pages no previous block");
1208             goto err;
1209         }
1210     } else {
1211         ramblock = qemu_ram_block_by_name(rbname);
1212
1213         if (!ramblock) {
1214             /* We shouldn't be asked for a non-existent RAMBlock */
1215             error_report("ram_save_queue_pages no block '%s'", rbname);
1216             goto err;
1217         }
1218         ms->last_req_rb = ramblock;
1219     }
1220     trace_ram_save_queue_pages(ramblock->idstr, start, len);
1221     if (start+len > ramblock->used_length) {
1222         error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1223                      RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1224                      __func__, start, len, ramblock->used_length);
1225         goto err;
1226     }
1227
1228     struct MigrationSrcPageRequest *new_entry =
1229         g_malloc0(sizeof(struct MigrationSrcPageRequest));
1230     new_entry->rb = ramblock;
1231     new_entry->offset = start;
1232     new_entry->len = len;
1233
1234     memory_region_ref(ramblock->mr);
1235     qemu_mutex_lock(&ms->src_page_req_mutex);
1236     QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1237     qemu_mutex_unlock(&ms->src_page_req_mutex);
1238     rcu_read_unlock();
1239
1240     return 0;
1241
1242 err:
1243     rcu_read_unlock();
1244     return -1;
1245 }
1246
1247 /**
1248  * ram_save_target_page: Save one target page
1249  *
1250  *
1251  * @f: QEMUFile where to send the data
1252  * @block: pointer to block that contains the page we want to send
1253  * @offset: offset inside the block for the page;
1254  * @last_stage: if we are at the completion stage
1255  * @bytes_transferred: increase it with the number of transferred bytes
1256  * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1257  *
1258  * Returns: Number of pages written.
1259  */
1260 static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
1261                                 PageSearchStatus *pss,
1262                                 bool last_stage,
1263                                 uint64_t *bytes_transferred,
1264                                 ram_addr_t dirty_ram_abs)
1265 {
1266     int res = 0;
1267
1268     /* Check the pages is dirty and if it is send it */
1269     if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1270         unsigned long *unsentmap;
1271         if (compression_switch && migrate_use_compression()) {
1272             res = ram_save_compressed_page(ms, f, pss,
1273                                            last_stage,
1274                                            bytes_transferred);
1275         } else {
1276             res = ram_save_page(ms, f, pss, last_stage,
1277                                 bytes_transferred);
1278         }
1279
1280         if (res < 0) {
1281             return res;
1282         }
1283         unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1284         if (unsentmap) {
1285             clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1286         }
1287         /* Only update last_sent_block if a block was actually sent; xbzrle
1288          * might have decided the page was identical so didn't bother writing
1289          * to the stream.
1290          */
1291         if (res > 0) {
1292             last_sent_block = pss->block;
1293         }
1294     }
1295
1296     return res;
1297 }
1298
1299 /**
1300  * ram_save_host_page: Starting at *offset send pages up to the end
1301  *                     of the current host page.  It's valid for the initial
1302  *                     offset to point into the middle of a host page
1303  *                     in which case the remainder of the hostpage is sent.
1304  *                     Only dirty target pages are sent.
1305  *
1306  * Returns: Number of pages written.
1307  *
1308  * @f: QEMUFile where to send the data
1309  * @block: pointer to block that contains the page we want to send
1310  * @offset: offset inside the block for the page; updated to last target page
1311  *          sent
1312  * @last_stage: if we are at the completion stage
1313  * @bytes_transferred: increase it with the number of transferred bytes
1314  * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1315  */
1316 static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
1317                               PageSearchStatus *pss,
1318                               bool last_stage,
1319                               uint64_t *bytes_transferred,
1320                               ram_addr_t dirty_ram_abs)
1321 {
1322     int tmppages, pages = 0;
1323     do {
1324         tmppages = ram_save_target_page(ms, f, pss, last_stage,
1325                                         bytes_transferred, dirty_ram_abs);
1326         if (tmppages < 0) {
1327             return tmppages;
1328         }
1329
1330         pages += tmppages;
1331         pss->offset += TARGET_PAGE_SIZE;
1332         dirty_ram_abs += TARGET_PAGE_SIZE;
1333     } while (pss->offset & (qemu_host_page_size - 1));
1334
1335     /* The offset we leave with is the last one we looked at */
1336     pss->offset -= TARGET_PAGE_SIZE;
1337     return pages;
1338 }
1339
1340 /**
1341  * ram_find_and_save_block: Finds a dirty page and sends it to f
1342  *
1343  * Called within an RCU critical section.
1344  *
1345  * Returns:  The number of pages written
1346  *           0 means no dirty pages
1347  *
1348  * @f: QEMUFile where to send the data
1349  * @last_stage: if we are at the completion stage
1350  * @bytes_transferred: increase it with the number of transferred bytes
1351  *
1352  * On systems where host-page-size > target-page-size it will send all the
1353  * pages in a host page that are dirty.
1354  */
1355
1356 static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
1357                                    uint64_t *bytes_transferred)
1358 {
1359     PageSearchStatus pss;
1360     MigrationState *ms = migrate_get_current();
1361     int pages = 0;
1362     bool again, found;
1363     ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1364                                  ram_addr_t space */
1365
1366     /* No dirty page as there is zero RAM */
1367     if (!ram_bytes_total()) {
1368         return pages;
1369     }
1370
1371     pss.block = last_seen_block;
1372     pss.offset = last_offset;
1373     pss.complete_round = false;
1374
1375     if (!pss.block) {
1376         pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1377     }
1378
1379     do {
1380         again = true;
1381         found = get_queued_page(ms, &pss, &dirty_ram_abs);
1382
1383         if (!found) {
1384             /* priority queue empty, so just search for something dirty */
1385             found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
1386         }
1387
1388         if (found) {
1389             pages = ram_save_host_page(ms, f, &pss,
1390                                        last_stage, bytes_transferred,
1391                                        dirty_ram_abs);
1392         }
1393     } while (!pages && again);
1394
1395     last_seen_block = pss.block;
1396     last_offset = pss.offset;
1397
1398     return pages;
1399 }
1400
1401 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1402 {
1403     uint64_t pages = size / TARGET_PAGE_SIZE;
1404     if (zero) {
1405         acct_info.dup_pages += pages;
1406     } else {
1407         acct_info.norm_pages += pages;
1408         bytes_transferred += size;
1409         qemu_update_position(f, size);
1410     }
1411 }
1412
1413 static ram_addr_t ram_save_remaining(void)
1414 {
1415     return migration_dirty_pages;
1416 }
1417
1418 uint64_t ram_bytes_remaining(void)
1419 {
1420     return ram_save_remaining() * TARGET_PAGE_SIZE;
1421 }
1422
1423 uint64_t ram_bytes_transferred(void)
1424 {
1425     return bytes_transferred;
1426 }
1427
1428 uint64_t ram_bytes_total(void)
1429 {
1430     RAMBlock *block;
1431     uint64_t total = 0;
1432
1433     rcu_read_lock();
1434     QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1435         total += block->used_length;
1436     rcu_read_unlock();
1437     return total;
1438 }
1439
1440 void free_xbzrle_decoded_buf(void)
1441 {
1442     g_free(xbzrle_decoded_buf);
1443     xbzrle_decoded_buf = NULL;
1444 }
1445
1446 static void migration_bitmap_free(struct BitmapRcu *bmap)
1447 {
1448     g_free(bmap->bmap);
1449     g_free(bmap->unsentmap);
1450     g_free(bmap);
1451 }
1452
1453 static void ram_migration_cleanup(void *opaque)
1454 {
1455     /* caller have hold iothread lock or is in a bh, so there is
1456      * no writing race against this migration_bitmap
1457      */
1458     struct BitmapRcu *bitmap = migration_bitmap_rcu;
1459     atomic_rcu_set(&migration_bitmap_rcu, NULL);
1460     if (bitmap) {
1461         memory_global_dirty_log_stop();
1462         call_rcu(bitmap, migration_bitmap_free, rcu);
1463     }
1464
1465     XBZRLE_cache_lock();
1466     if (XBZRLE.cache) {
1467         cache_fini(XBZRLE.cache);
1468         g_free(XBZRLE.encoded_buf);
1469         g_free(XBZRLE.current_buf);
1470         g_free(ZERO_TARGET_PAGE);
1471         XBZRLE.cache = NULL;
1472         XBZRLE.encoded_buf = NULL;
1473         XBZRLE.current_buf = NULL;
1474     }
1475     XBZRLE_cache_unlock();
1476 }
1477
1478 static void reset_ram_globals(void)
1479 {
1480     last_seen_block = NULL;
1481     last_sent_block = NULL;
1482     last_offset = 0;
1483     last_version = ram_list.version;
1484     ram_bulk_stage = true;
1485 }
1486
1487 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1488
1489 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1490 {
1491     /* called in qemu main thread, so there is
1492      * no writing race against this migration_bitmap
1493      */
1494     if (migration_bitmap_rcu) {
1495         struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1496         bitmap = g_new(struct BitmapRcu, 1);
1497         bitmap->bmap = bitmap_new(new);
1498
1499         /* prevent migration_bitmap content from being set bit
1500          * by migration_bitmap_sync_range() at the same time.
1501          * it is safe to migration if migration_bitmap is cleared bit
1502          * at the same time.
1503          */
1504         qemu_mutex_lock(&migration_bitmap_mutex);
1505         bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1506         bitmap_set(bitmap->bmap, old, new - old);
1507
1508         /* We don't have a way to safely extend the sentmap
1509          * with RCU; so mark it as missing, entry to postcopy
1510          * will fail.
1511          */
1512         bitmap->unsentmap = NULL;
1513
1514         atomic_rcu_set(&migration_bitmap_rcu, bitmap);
1515         qemu_mutex_unlock(&migration_bitmap_mutex);
1516         migration_dirty_pages += new - old;
1517         call_rcu(old_bitmap, migration_bitmap_free, rcu);
1518     }
1519 }
1520
1521 /*
1522  * 'expected' is the value you expect the bitmap mostly to be full
1523  * of; it won't bother printing lines that are all this value.
1524  * If 'todump' is null the migration bitmap is dumped.
1525  */
1526 void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1527 {
1528     int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1529
1530     int64_t cur;
1531     int64_t linelen = 128;
1532     char linebuf[129];
1533
1534     if (!todump) {
1535         todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1536     }
1537
1538     for (cur = 0; cur < ram_pages; cur += linelen) {
1539         int64_t curb;
1540         bool found = false;
1541         /*
1542          * Last line; catch the case where the line length
1543          * is longer than remaining ram
1544          */
1545         if (cur + linelen > ram_pages) {
1546             linelen = ram_pages - cur;
1547         }
1548         for (curb = 0; curb < linelen; curb++) {
1549             bool thisbit = test_bit(cur + curb, todump);
1550             linebuf[curb] = thisbit ? '1' : '.';
1551             found = found || (thisbit != expected);
1552         }
1553         if (found) {
1554             linebuf[curb] = '\0';
1555             fprintf(stderr,  "0x%08" PRIx64 " : %s\n", cur, linebuf);
1556         }
1557     }
1558 }
1559
1560 /* **** functions for postcopy ***** */
1561
1562 void ram_postcopy_migrated_memory_release(MigrationState *ms)
1563 {
1564     struct RAMBlock *block;
1565     unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1566
1567     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1568         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1569         unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1570         unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1571
1572         while (run_start < range) {
1573             unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1574             ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS,
1575                               (run_end - run_start) << TARGET_PAGE_BITS);
1576             run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1577         }
1578     }
1579 }
1580
1581 /*
1582  * Callback from postcopy_each_ram_send_discard for each RAMBlock
1583  * Note: At this point the 'unsentmap' is the processed bitmap combined
1584  *       with the dirtymap; so a '1' means it's either dirty or unsent.
1585  * start,length: Indexes into the bitmap for the first bit
1586  *            representing the named block and length in target-pages
1587  */
1588 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1589                                         PostcopyDiscardState *pds,
1590                                         unsigned long start,
1591                                         unsigned long length)
1592 {
1593     unsigned long end = start + length; /* one after the end */
1594     unsigned long current;
1595     unsigned long *unsentmap;
1596
1597     unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1598     for (current = start; current < end; ) {
1599         unsigned long one = find_next_bit(unsentmap, end, current);
1600
1601         if (one <= end) {
1602             unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1603             unsigned long discard_length;
1604
1605             if (zero >= end) {
1606                 discard_length = end - one;
1607             } else {
1608                 discard_length = zero - one;
1609             }
1610             if (discard_length) {
1611                 postcopy_discard_send_range(ms, pds, one, discard_length);
1612             }
1613             current = one + discard_length;
1614         } else {
1615             current = one;
1616         }
1617     }
1618
1619     return 0;
1620 }
1621
1622 /*
1623  * Utility for the outgoing postcopy code.
1624  *   Calls postcopy_send_discard_bm_ram for each RAMBlock
1625  *   passing it bitmap indexes and name.
1626  * Returns: 0 on success
1627  * (qemu_ram_foreach_block ends up passing unscaled lengths
1628  *  which would mean postcopy code would have to deal with target page)
1629  */
1630 static int postcopy_each_ram_send_discard(MigrationState *ms)
1631 {
1632     struct RAMBlock *block;
1633     int ret;
1634
1635     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1636         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1637         PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1638                                                                first,
1639                                                                block->idstr);
1640
1641         /*
1642          * Postcopy sends chunks of bitmap over the wire, but it
1643          * just needs indexes at this point, avoids it having
1644          * target page specific code.
1645          */
1646         ret = postcopy_send_discard_bm_ram(ms, pds, first,
1647                                     block->used_length >> TARGET_PAGE_BITS);
1648         postcopy_discard_send_finish(ms, pds);
1649         if (ret) {
1650             return ret;
1651         }
1652     }
1653
1654     return 0;
1655 }
1656
1657 /*
1658  * Helper for postcopy_chunk_hostpages; it's called twice to cleanup
1659  *   the two bitmaps, that are similar, but one is inverted.
1660  *
1661  * We search for runs of target-pages that don't start or end on a
1662  * host page boundary;
1663  * unsent_pass=true: Cleans up partially unsent host pages by searching
1664  *                 the unsentmap
1665  * unsent_pass=false: Cleans up partially dirty host pages by searching
1666  *                 the main migration bitmap
1667  *
1668  */
1669 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1670                                           RAMBlock *block,
1671                                           PostcopyDiscardState *pds)
1672 {
1673     unsigned long *bitmap;
1674     unsigned long *unsentmap;
1675     unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
1676     unsigned long first = block->offset >> TARGET_PAGE_BITS;
1677     unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1678     unsigned long last = first + (len - 1);
1679     unsigned long run_start;
1680
1681     if (block->page_size == TARGET_PAGE_SIZE) {
1682         /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1683         return;
1684     }
1685
1686     bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1687     unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1688
1689     if (unsent_pass) {
1690         /* Find a sent page */
1691         run_start = find_next_zero_bit(unsentmap, last + 1, first);
1692     } else {
1693         /* Find a dirty page */
1694         run_start = find_next_bit(bitmap, last + 1, first);
1695     }
1696
1697     while (run_start <= last) {
1698         bool do_fixup = false;
1699         unsigned long fixup_start_addr;
1700         unsigned long host_offset;
1701
1702         /*
1703          * If the start of this run of pages is in the middle of a host
1704          * page, then we need to fixup this host page.
1705          */
1706         host_offset = run_start % host_ratio;
1707         if (host_offset) {
1708             do_fixup = true;
1709             run_start -= host_offset;
1710             fixup_start_addr = run_start;
1711             /* For the next pass */
1712             run_start = run_start + host_ratio;
1713         } else {
1714             /* Find the end of this run */
1715             unsigned long run_end;
1716             if (unsent_pass) {
1717                 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1718             } else {
1719                 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1720             }
1721             /*
1722              * If the end isn't at the start of a host page, then the
1723              * run doesn't finish at the end of a host page
1724              * and we need to discard.
1725              */
1726             host_offset = run_end % host_ratio;
1727             if (host_offset) {
1728                 do_fixup = true;
1729                 fixup_start_addr = run_end - host_offset;
1730                 /*
1731                  * This host page has gone, the next loop iteration starts
1732                  * from after the fixup
1733                  */
1734                 run_start = fixup_start_addr + host_ratio;
1735             } else {
1736                 /*
1737                  * No discards on this iteration, next loop starts from
1738                  * next sent/dirty page
1739                  */
1740                 run_start = run_end + 1;
1741             }
1742         }
1743
1744         if (do_fixup) {
1745             unsigned long page;
1746
1747             /* Tell the destination to discard this page */
1748             if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1749                 /* For the unsent_pass we:
1750                  *     discard partially sent pages
1751                  * For the !unsent_pass (dirty) we:
1752                  *     discard partially dirty pages that were sent
1753                  *     (any partially sent pages were already discarded
1754                  *     by the previous unsent_pass)
1755                  */
1756                 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1757                                             host_ratio);
1758             }
1759
1760             /* Clean up the bitmap */
1761             for (page = fixup_start_addr;
1762                  page < fixup_start_addr + host_ratio; page++) {
1763                 /* All pages in this host page are now not sent */
1764                 set_bit(page, unsentmap);
1765
1766                 /*
1767                  * Remark them as dirty, updating the count for any pages
1768                  * that weren't previously dirty.
1769                  */
1770                 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1771             }
1772         }
1773
1774         if (unsent_pass) {
1775             /* Find the next sent page for the next iteration */
1776             run_start = find_next_zero_bit(unsentmap, last + 1,
1777                                            run_start);
1778         } else {
1779             /* Find the next dirty page for the next iteration */
1780             run_start = find_next_bit(bitmap, last + 1, run_start);
1781         }
1782     }
1783 }
1784
1785 /*
1786  * Utility for the outgoing postcopy code.
1787  *
1788  * Discard any partially sent host-page size chunks, mark any partially
1789  * dirty host-page size chunks as all dirty.  In this case the host-page
1790  * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1791  *
1792  * Returns: 0 on success
1793  */
1794 static int postcopy_chunk_hostpages(MigrationState *ms)
1795 {
1796     struct RAMBlock *block;
1797
1798     /* Easiest way to make sure we don't resume in the middle of a host-page */
1799     last_seen_block = NULL;
1800     last_sent_block = NULL;
1801     last_offset     = 0;
1802
1803     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1804         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1805
1806         PostcopyDiscardState *pds =
1807                          postcopy_discard_send_init(ms, first, block->idstr);
1808
1809         /* First pass: Discard all partially sent host pages */
1810         postcopy_chunk_hostpages_pass(ms, true, block, pds);
1811         /*
1812          * Second pass: Ensure that all partially dirty host pages are made
1813          * fully dirty.
1814          */
1815         postcopy_chunk_hostpages_pass(ms, false, block, pds);
1816
1817         postcopy_discard_send_finish(ms, pds);
1818     } /* ram_list loop */
1819
1820     return 0;
1821 }
1822
1823 /*
1824  * Transmit the set of pages to be discarded after precopy to the target
1825  * these are pages that:
1826  *     a) Have been previously transmitted but are now dirty again
1827  *     b) Pages that have never been transmitted, this ensures that
1828  *        any pages on the destination that have been mapped by background
1829  *        tasks get discarded (transparent huge pages is the specific concern)
1830  * Hopefully this is pretty sparse
1831  */
1832 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1833 {
1834     int ret;
1835     unsigned long *bitmap, *unsentmap;
1836
1837     rcu_read_lock();
1838
1839     /* This should be our last sync, the src is now paused */
1840     migration_bitmap_sync();
1841
1842     unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1843     if (!unsentmap) {
1844         /* We don't have a safe way to resize the sentmap, so
1845          * if the bitmap was resized it will be NULL at this
1846          * point.
1847          */
1848         error_report("migration ram resized during precopy phase");
1849         rcu_read_unlock();
1850         return -EINVAL;
1851     }
1852
1853     /* Deal with TPS != HPS and huge pages */
1854     ret = postcopy_chunk_hostpages(ms);
1855     if (ret) {
1856         rcu_read_unlock();
1857         return ret;
1858     }
1859
1860     /*
1861      * Update the unsentmap to be unsentmap = unsentmap | dirty
1862      */
1863     bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1864     bitmap_or(unsentmap, unsentmap, bitmap,
1865                last_ram_offset() >> TARGET_PAGE_BITS);
1866
1867
1868     trace_ram_postcopy_send_discard_bitmap();
1869 #ifdef DEBUG_POSTCOPY
1870     ram_debug_dump_bitmap(unsentmap, true);
1871 #endif
1872
1873     ret = postcopy_each_ram_send_discard(ms);
1874     rcu_read_unlock();
1875
1876     return ret;
1877 }
1878
1879 /*
1880  * At the start of the postcopy phase of migration, any now-dirty
1881  * precopied pages are discarded.
1882  *
1883  * start, length describe a byte address range within the RAMBlock
1884  *
1885  * Returns 0 on success.
1886  */
1887 int ram_discard_range(MigrationIncomingState *mis,
1888                       const char *block_name,
1889                       uint64_t start, size_t length)
1890 {
1891     int ret = -1;
1892
1893     trace_ram_discard_range(block_name, start, length);
1894
1895     rcu_read_lock();
1896     RAMBlock *rb = qemu_ram_block_by_name(block_name);
1897
1898     if (!rb) {
1899         error_report("ram_discard_range: Failed to find block '%s'",
1900                      block_name);
1901         goto err;
1902     }
1903
1904     ret = ram_block_discard_range(rb, start, length);
1905
1906 err:
1907     rcu_read_unlock();
1908
1909     return ret;
1910 }
1911
1912 static int ram_save_init_globals(void)
1913 {
1914     int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1915
1916     dirty_rate_high_cnt = 0;
1917     bitmap_sync_count = 0;
1918     migration_bitmap_sync_init();
1919     qemu_mutex_init(&migration_bitmap_mutex);
1920
1921     if (migrate_use_xbzrle()) {
1922         XBZRLE_cache_lock();
1923         ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
1924         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1925                                   TARGET_PAGE_SIZE,
1926                                   TARGET_PAGE_SIZE);
1927         if (!XBZRLE.cache) {
1928             XBZRLE_cache_unlock();
1929             error_report("Error creating cache");
1930             return -1;
1931         }
1932         XBZRLE_cache_unlock();
1933
1934         /* We prefer not to abort if there is no memory */
1935         XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1936         if (!XBZRLE.encoded_buf) {
1937             error_report("Error allocating encoded_buf");
1938             return -1;
1939         }
1940
1941         XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1942         if (!XBZRLE.current_buf) {
1943             error_report("Error allocating current_buf");
1944             g_free(XBZRLE.encoded_buf);
1945             XBZRLE.encoded_buf = NULL;
1946             return -1;
1947         }
1948
1949         acct_clear();
1950     }
1951
1952     /* For memory_global_dirty_log_start below.  */
1953     qemu_mutex_lock_iothread();
1954
1955     qemu_mutex_lock_ramlist();
1956     rcu_read_lock();
1957     bytes_transferred = 0;
1958     reset_ram_globals();
1959
1960     migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
1961     /* Skip setting bitmap if there is no RAM */
1962     if (ram_bytes_total()) {
1963         ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1964         migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
1965         bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
1966
1967         if (migrate_postcopy_ram()) {
1968             migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
1969             bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
1970         }
1971     }
1972
1973     /*
1974      * Count the total number of pages used by ram blocks not including any
1975      * gaps due to alignment or unplugs.
1976      */
1977     migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1978
1979     memory_global_dirty_log_start();
1980     migration_bitmap_sync();
1981     qemu_mutex_unlock_ramlist();
1982     qemu_mutex_unlock_iothread();
1983     rcu_read_unlock();
1984
1985     return 0;
1986 }
1987
1988 /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1989  * long-running RCU critical section.  When rcu-reclaims in the code
1990  * start to become numerous it will be necessary to reduce the
1991  * granularity of these critical sections.
1992  */
1993
1994 static int ram_save_setup(QEMUFile *f, void *opaque)
1995 {
1996     RAMBlock *block;
1997
1998     /* migration has already setup the bitmap, reuse it. */
1999     if (!migration_in_colo_state()) {
2000         if (ram_save_init_globals() < 0) {
2001             return -1;
2002          }
2003     }
2004
2005     rcu_read_lock();
2006
2007     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2008
2009     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2010         qemu_put_byte(f, strlen(block->idstr));
2011         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2012         qemu_put_be64(f, block->used_length);
2013         if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2014             qemu_put_be64(f, block->page_size);
2015         }
2016     }
2017
2018     rcu_read_unlock();
2019
2020     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2021     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2022
2023     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2024
2025     return 0;
2026 }
2027
2028 static int ram_save_iterate(QEMUFile *f, void *opaque)
2029 {
2030     int ret;
2031     int i;
2032     int64_t t0;
2033     int done = 0;
2034
2035     rcu_read_lock();
2036     if (ram_list.version != last_version) {
2037         reset_ram_globals();
2038     }
2039
2040     /* Read version before ram_list.blocks */
2041     smp_rmb();
2042
2043     ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2044
2045     t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2046     i = 0;
2047     while ((ret = qemu_file_rate_limit(f)) == 0) {
2048         int pages;
2049
2050         pages = ram_find_and_save_block(f, false, &bytes_transferred);
2051         /* no more pages to sent */
2052         if (pages == 0) {
2053             done = 1;
2054             break;
2055         }
2056         acct_info.iterations++;
2057
2058         /* we want to check in the 1st loop, just in case it was the 1st time
2059            and we had to sync the dirty bitmap.
2060            qemu_get_clock_ns() is a bit expensive, so we only check each some
2061            iterations
2062         */
2063         if ((i & 63) == 0) {
2064             uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2065             if (t1 > MAX_WAIT) {
2066                 trace_ram_save_iterate_big_wait(t1, i);
2067                 break;
2068             }
2069         }
2070         i++;
2071     }
2072     flush_compressed_data(f);
2073     rcu_read_unlock();
2074
2075     /*
2076      * Must occur before EOS (or any QEMUFile operation)
2077      * because of RDMA protocol.
2078      */
2079     ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2080
2081     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2082     bytes_transferred += 8;
2083
2084     ret = qemu_file_get_error(f);
2085     if (ret < 0) {
2086         return ret;
2087     }
2088
2089     return done;
2090 }
2091
2092 /* Called with iothread lock */
2093 static int ram_save_complete(QEMUFile *f, void *opaque)
2094 {
2095     rcu_read_lock();
2096
2097     if (!migration_in_postcopy(migrate_get_current())) {
2098         migration_bitmap_sync();
2099     }
2100
2101     ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2102
2103     /* try transferring iterative blocks of memory */
2104
2105     /* flush all remaining blocks regardless of rate limiting */
2106     while (true) {
2107         int pages;
2108
2109         pages = ram_find_and_save_block(f, !migration_in_colo_state(),
2110                                         &bytes_transferred);
2111         /* no more blocks to sent */
2112         if (pages == 0) {
2113             break;
2114         }
2115     }
2116
2117     flush_compressed_data(f);
2118     ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2119
2120     rcu_read_unlock();
2121
2122     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2123
2124     return 0;
2125 }
2126
2127 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2128                              uint64_t *non_postcopiable_pending,
2129                              uint64_t *postcopiable_pending)
2130 {
2131     uint64_t remaining_size;
2132
2133     remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2134
2135     if (!migration_in_postcopy(migrate_get_current()) &&
2136         remaining_size < max_size) {
2137         qemu_mutex_lock_iothread();
2138         rcu_read_lock();
2139         migration_bitmap_sync();
2140         rcu_read_unlock();
2141         qemu_mutex_unlock_iothread();
2142         remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2143     }
2144
2145     /* We can do postcopy, and all the data is postcopiable */
2146     *postcopiable_pending += remaining_size;
2147 }
2148
2149 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2150 {
2151     unsigned int xh_len;
2152     int xh_flags;
2153     uint8_t *loaded_data;
2154
2155     if (!xbzrle_decoded_buf) {
2156         xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2157     }
2158     loaded_data = xbzrle_decoded_buf;
2159
2160     /* extract RLE header */
2161     xh_flags = qemu_get_byte(f);
2162     xh_len = qemu_get_be16(f);
2163
2164     if (xh_flags != ENCODING_FLAG_XBZRLE) {
2165         error_report("Failed to load XBZRLE page - wrong compression!");
2166         return -1;
2167     }
2168
2169     if (xh_len > TARGET_PAGE_SIZE) {
2170         error_report("Failed to load XBZRLE page - len overflow!");
2171         return -1;
2172     }
2173     /* load data and decode */
2174     qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2175
2176     /* decode RLE */
2177     if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2178                              TARGET_PAGE_SIZE) == -1) {
2179         error_report("Failed to load XBZRLE page - decode error!");
2180         return -1;
2181     }
2182
2183     return 0;
2184 }
2185
2186 /* Must be called from within a rcu critical section.
2187  * Returns a pointer from within the RCU-protected ram_list.
2188  */
2189 /*
2190  * Read a RAMBlock ID from the stream f.
2191  *
2192  * f: Stream to read from
2193  * flags: Page flags (mostly to see if it's a continuation of previous block)
2194  */
2195 static inline RAMBlock *ram_block_from_stream(QEMUFile *f,
2196                                               int flags)
2197 {
2198     static RAMBlock *block = NULL;
2199     char id[256];
2200     uint8_t len;
2201
2202     if (flags & RAM_SAVE_FLAG_CONTINUE) {
2203         if (!block) {
2204             error_report("Ack, bad migration stream!");
2205             return NULL;
2206         }
2207         return block;
2208     }
2209
2210     len = qemu_get_byte(f);
2211     qemu_get_buffer(f, (uint8_t *)id, len);
2212     id[len] = 0;
2213
2214     block = qemu_ram_block_by_name(id);
2215     if (!block) {
2216         error_report("Can't find block %s", id);
2217         return NULL;
2218     }
2219
2220     return block;
2221 }
2222
2223 static inline void *host_from_ram_block_offset(RAMBlock *block,
2224                                                ram_addr_t offset)
2225 {
2226     if (!offset_in_ramblock(block, offset)) {
2227         return NULL;
2228     }
2229
2230     return block->host + offset;
2231 }
2232
2233 /*
2234  * If a page (or a whole RDMA chunk) has been
2235  * determined to be zero, then zap it.
2236  */
2237 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2238 {
2239     if (ch != 0 || !is_zero_range(host, size)) {
2240         memset(host, ch, size);
2241     }
2242 }
2243
2244 static void *do_data_decompress(void *opaque)
2245 {
2246     DecompressParam *param = opaque;
2247     unsigned long pagesize;
2248     uint8_t *des;
2249     int len;
2250
2251     qemu_mutex_lock(&param->mutex);
2252     while (!param->quit) {
2253         if (param->des) {
2254             des = param->des;
2255             len = param->len;
2256             param->des = 0;
2257             qemu_mutex_unlock(&param->mutex);
2258
2259             pagesize = TARGET_PAGE_SIZE;
2260             /* uncompress() will return failed in some case, especially
2261              * when the page is dirted when doing the compression, it's
2262              * not a problem because the dirty page will be retransferred
2263              * and uncompress() won't break the data in other pages.
2264              */
2265             uncompress((Bytef *)des, &pagesize,
2266                        (const Bytef *)param->compbuf, len);
2267
2268             qemu_mutex_lock(&decomp_done_lock);
2269             param->done = true;
2270             qemu_cond_signal(&decomp_done_cond);
2271             qemu_mutex_unlock(&decomp_done_lock);
2272
2273             qemu_mutex_lock(&param->mutex);
2274         } else {
2275             qemu_cond_wait(&param->cond, &param->mutex);
2276         }
2277     }
2278     qemu_mutex_unlock(&param->mutex);
2279
2280     return NULL;
2281 }
2282
2283 static void wait_for_decompress_done(void)
2284 {
2285     int idx, thread_count;
2286
2287     if (!migrate_use_compression()) {
2288         return;
2289     }
2290
2291     thread_count = migrate_decompress_threads();
2292     qemu_mutex_lock(&decomp_done_lock);
2293     for (idx = 0; idx < thread_count; idx++) {
2294         while (!decomp_param[idx].done) {
2295             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2296         }
2297     }
2298     qemu_mutex_unlock(&decomp_done_lock);
2299 }
2300
2301 void migrate_decompress_threads_create(void)
2302 {
2303     int i, thread_count;
2304
2305     thread_count = migrate_decompress_threads();
2306     decompress_threads = g_new0(QemuThread, thread_count);
2307     decomp_param = g_new0(DecompressParam, thread_count);
2308     qemu_mutex_init(&decomp_done_lock);
2309     qemu_cond_init(&decomp_done_cond);
2310     for (i = 0; i < thread_count; i++) {
2311         qemu_mutex_init(&decomp_param[i].mutex);
2312         qemu_cond_init(&decomp_param[i].cond);
2313         decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2314         decomp_param[i].done = true;
2315         decomp_param[i].quit = false;
2316         qemu_thread_create(decompress_threads + i, "decompress",
2317                            do_data_decompress, decomp_param + i,
2318                            QEMU_THREAD_JOINABLE);
2319     }
2320 }
2321
2322 void migrate_decompress_threads_join(void)
2323 {
2324     int i, thread_count;
2325
2326     thread_count = migrate_decompress_threads();
2327     for (i = 0; i < thread_count; i++) {
2328         qemu_mutex_lock(&decomp_param[i].mutex);
2329         decomp_param[i].quit = true;
2330         qemu_cond_signal(&decomp_param[i].cond);
2331         qemu_mutex_unlock(&decomp_param[i].mutex);
2332     }
2333     for (i = 0; i < thread_count; i++) {
2334         qemu_thread_join(decompress_threads + i);
2335         qemu_mutex_destroy(&decomp_param[i].mutex);
2336         qemu_cond_destroy(&decomp_param[i].cond);
2337         g_free(decomp_param[i].compbuf);
2338     }
2339     g_free(decompress_threads);
2340     g_free(decomp_param);
2341     decompress_threads = NULL;
2342     decomp_param = NULL;
2343 }
2344
2345 static void decompress_data_with_multi_threads(QEMUFile *f,
2346                                                void *host, int len)
2347 {
2348     int idx, thread_count;
2349
2350     thread_count = migrate_decompress_threads();
2351     qemu_mutex_lock(&decomp_done_lock);
2352     while (true) {
2353         for (idx = 0; idx < thread_count; idx++) {
2354             if (decomp_param[idx].done) {
2355                 decomp_param[idx].done = false;
2356                 qemu_mutex_lock(&decomp_param[idx].mutex);
2357                 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2358                 decomp_param[idx].des = host;
2359                 decomp_param[idx].len = len;
2360                 qemu_cond_signal(&decomp_param[idx].cond);
2361                 qemu_mutex_unlock(&decomp_param[idx].mutex);
2362                 break;
2363             }
2364         }
2365         if (idx < thread_count) {
2366             break;
2367         } else {
2368             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2369         }
2370     }
2371     qemu_mutex_unlock(&decomp_done_lock);
2372 }
2373
2374 /*
2375  * Allocate data structures etc needed by incoming migration with postcopy-ram
2376  * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
2377  */
2378 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2379 {
2380     size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2381
2382     return postcopy_ram_incoming_init(mis, ram_pages);
2383 }
2384
2385 /*
2386  * Called in postcopy mode by ram_load().
2387  * rcu_read_lock is taken prior to this being called.
2388  */
2389 static int ram_load_postcopy(QEMUFile *f)
2390 {
2391     int flags = 0, ret = 0;
2392     bool place_needed = false;
2393     bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
2394     MigrationIncomingState *mis = migration_incoming_get_current();
2395     /* Temporary page that is later 'placed' */
2396     void *postcopy_host_page = postcopy_get_tmp_page(mis);
2397     void *last_host = NULL;
2398     bool all_zero = false;
2399
2400     while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2401         ram_addr_t addr;
2402         void *host = NULL;
2403         void *page_buffer = NULL;
2404         void *place_source = NULL;
2405         RAMBlock *block = NULL;
2406         uint8_t ch;
2407
2408         addr = qemu_get_be64(f);
2409         flags = addr & ~TARGET_PAGE_MASK;
2410         addr &= TARGET_PAGE_MASK;
2411
2412         trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2413         place_needed = false;
2414         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
2415             block = ram_block_from_stream(f, flags);
2416
2417             host = host_from_ram_block_offset(block, addr);
2418             if (!host) {
2419                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2420                 ret = -EINVAL;
2421                 break;
2422             }
2423             /*
2424              * Postcopy requires that we place whole host pages atomically.
2425              * To make it atomic, the data is read into a temporary page
2426              * that's moved into place later.
2427              * The migration protocol uses,  possibly smaller, target-pages
2428              * however the source ensures it always sends all the components
2429              * of a host page in order.
2430              */
2431             page_buffer = postcopy_host_page +
2432                           ((uintptr_t)host & ~qemu_host_page_mask);
2433             /* If all TP are zero then we can optimise the place */
2434             if (!((uintptr_t)host & ~qemu_host_page_mask)) {
2435                 all_zero = true;
2436             } else {
2437                 /* not the 1st TP within the HP */
2438                 if (host != (last_host + TARGET_PAGE_SIZE)) {
2439                     error_report("Non-sequential target page %p/%p",
2440                                   host, last_host);
2441                     ret = -EINVAL;
2442                     break;
2443                 }
2444             }
2445
2446
2447             /*
2448              * If it's the last part of a host page then we place the host
2449              * page
2450              */
2451             place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2452                                      ~qemu_host_page_mask) == 0;
2453             place_source = postcopy_host_page;
2454         }
2455         last_host = host;
2456
2457         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2458         case RAM_SAVE_FLAG_COMPRESS:
2459             ch = qemu_get_byte(f);
2460             memset(page_buffer, ch, TARGET_PAGE_SIZE);
2461             if (ch) {
2462                 all_zero = false;
2463             }
2464             break;
2465
2466         case RAM_SAVE_FLAG_PAGE:
2467             all_zero = false;
2468             if (!place_needed || !matching_page_sizes) {
2469                 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2470             } else {
2471                 /* Avoids the qemu_file copy during postcopy, which is
2472                  * going to do a copy later; can only do it when we
2473                  * do this read in one go (matching page sizes)
2474                  */
2475                 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2476                                          TARGET_PAGE_SIZE);
2477             }
2478             break;
2479         case RAM_SAVE_FLAG_EOS:
2480             /* normal exit */
2481             break;
2482         default:
2483             error_report("Unknown combination of migration flags: %#x"
2484                          " (postcopy mode)", flags);
2485             ret = -EINVAL;
2486         }
2487
2488         if (place_needed) {
2489             /* This gets called at the last target page in the host page */
2490             void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2491
2492             if (all_zero) {
2493                 ret = postcopy_place_page_zero(mis, place_dest,
2494                                                block->page_size);
2495             } else {
2496                 ret = postcopy_place_page(mis, place_dest,
2497                                           place_source, block->page_size);
2498             }
2499         }
2500         if (!ret) {
2501             ret = qemu_file_get_error(f);
2502         }
2503     }
2504
2505     return ret;
2506 }
2507
2508 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2509 {
2510     int flags = 0, ret = 0;
2511     static uint64_t seq_iter;
2512     int len = 0;
2513     /*
2514      * If system is running in postcopy mode, page inserts to host memory must
2515      * be atomic
2516      */
2517     bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2518     /* ADVISE is earlier, it shows the source has the postcopy capability on */
2519     bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
2520
2521     seq_iter++;
2522
2523     if (version_id != 4) {
2524         ret = -EINVAL;
2525     }
2526
2527     /* This RCU critical section can be very long running.
2528      * When RCU reclaims in the code start to become numerous,
2529      * it will be necessary to reduce the granularity of this
2530      * critical section.
2531      */
2532     rcu_read_lock();
2533
2534     if (postcopy_running) {
2535         ret = ram_load_postcopy(f);
2536     }
2537
2538     while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2539         ram_addr_t addr, total_ram_bytes;
2540         void *host = NULL;
2541         uint8_t ch;
2542
2543         addr = qemu_get_be64(f);
2544         flags = addr & ~TARGET_PAGE_MASK;
2545         addr &= TARGET_PAGE_MASK;
2546
2547         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2548                      RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2549             RAMBlock *block = ram_block_from_stream(f, flags);
2550
2551             host = host_from_ram_block_offset(block, addr);
2552             if (!host) {
2553                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2554                 ret = -EINVAL;
2555                 break;
2556             }
2557         }
2558
2559         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2560         case RAM_SAVE_FLAG_MEM_SIZE:
2561             /* Synchronize RAM block list */
2562             total_ram_bytes = addr;
2563             while (!ret && total_ram_bytes) {
2564                 RAMBlock *block;
2565                 char id[256];
2566                 ram_addr_t length;
2567
2568                 len = qemu_get_byte(f);
2569                 qemu_get_buffer(f, (uint8_t *)id, len);
2570                 id[len] = 0;
2571                 length = qemu_get_be64(f);
2572
2573                 block = qemu_ram_block_by_name(id);
2574                 if (block) {
2575                     if (length != block->used_length) {
2576                         Error *local_err = NULL;
2577
2578                         ret = qemu_ram_resize(block, length,
2579                                               &local_err);
2580                         if (local_err) {
2581                             error_report_err(local_err);
2582                         }
2583                     }
2584                     /* For postcopy we need to check hugepage sizes match */
2585                     if (postcopy_advised &&
2586                         block->page_size != qemu_host_page_size) {
2587                         uint64_t remote_page_size = qemu_get_be64(f);
2588                         if (remote_page_size != block->page_size) {
2589                             error_report("Mismatched RAM page size %s "
2590                                          "(local) %zd != %" PRId64,
2591                                          id, block->page_size,
2592                                          remote_page_size);
2593                             ret = -EINVAL;
2594                         }
2595                     }
2596                     ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2597                                           block->idstr);
2598                 } else {
2599                     error_report("Unknown ramblock \"%s\", cannot "
2600                                  "accept migration", id);
2601                     ret = -EINVAL;
2602                 }
2603
2604                 total_ram_bytes -= length;
2605             }
2606             break;
2607
2608         case RAM_SAVE_FLAG_COMPRESS:
2609             ch = qemu_get_byte(f);
2610             ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2611             break;
2612
2613         case RAM_SAVE_FLAG_PAGE:
2614             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2615             break;
2616
2617         case RAM_SAVE_FLAG_COMPRESS_PAGE:
2618             len = qemu_get_be32(f);
2619             if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2620                 error_report("Invalid compressed data length: %d", len);
2621                 ret = -EINVAL;
2622                 break;
2623             }
2624             decompress_data_with_multi_threads(f, host, len);
2625             break;
2626
2627         case RAM_SAVE_FLAG_XBZRLE:
2628             if (load_xbzrle(f, addr, host) < 0) {
2629                 error_report("Failed to decompress XBZRLE page at "
2630                              RAM_ADDR_FMT, addr);
2631                 ret = -EINVAL;
2632                 break;
2633             }
2634             break;
2635         case RAM_SAVE_FLAG_EOS:
2636             /* normal exit */
2637             break;
2638         default:
2639             if (flags & RAM_SAVE_FLAG_HOOK) {
2640                 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2641             } else {
2642                 error_report("Unknown combination of migration flags: %#x",
2643                              flags);
2644                 ret = -EINVAL;
2645             }
2646         }
2647         if (!ret) {
2648             ret = qemu_file_get_error(f);
2649         }
2650     }
2651
2652     wait_for_decompress_done();
2653     rcu_read_unlock();
2654     trace_ram_load_complete(ret, seq_iter);
2655     return ret;
2656 }
2657
2658 static SaveVMHandlers savevm_ram_handlers = {
2659     .save_live_setup = ram_save_setup,
2660     .save_live_iterate = ram_save_iterate,
2661     .save_live_complete_postcopy = ram_save_complete,
2662     .save_live_complete_precopy = ram_save_complete,
2663     .save_live_pending = ram_save_pending,
2664     .load_state = ram_load,
2665     .cleanup = ram_migration_cleanup,
2666 };
2667
2668 void ram_mig_init(void)
2669 {
2670     qemu_mutex_init(&XBZRLE.lock);
2671     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
2672 }