Migration: Emit event at start of pass
[qemu/cris-port.git] / migration / ram.c
blob102d1f2b14f101ea03e68ed3c2e9ca71111678fe
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
28 #include <stdint.h>
29 #include <zlib.h>
30 #include "qapi-event.h"
31 #include "qemu/bitops.h"
32 #include "qemu/bitmap.h"
33 #include "qemu/timer.h"
34 #include "qemu/main-loop.h"
35 #include "migration/migration.h"
36 #include "migration/postcopy-ram.h"
37 #include "exec/address-spaces.h"
38 #include "migration/page_cache.h"
39 #include "qemu/error-report.h"
40 #include "trace.h"
41 #include "exec/ram_addr.h"
42 #include "qemu/rcu_queue.h"
44 #ifdef DEBUG_MIGRATION_RAM
45 #define DPRINTF(fmt, ...) \
46 do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
47 #else
48 #define DPRINTF(fmt, ...) \
49 do { } while (0)
50 #endif
52 static int dirty_rate_high_cnt;
54 static uint64_t bitmap_sync_count;
56 /***********************************************************/
57 /* ram save/restore */
59 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
60 #define RAM_SAVE_FLAG_COMPRESS 0x02
61 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
62 #define RAM_SAVE_FLAG_PAGE 0x08
63 #define RAM_SAVE_FLAG_EOS 0x10
64 #define RAM_SAVE_FLAG_CONTINUE 0x20
65 #define RAM_SAVE_FLAG_XBZRLE 0x40
66 /* 0x80 is reserved in migration.h start with 0x100 next */
67 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
69 static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
71 static inline bool is_zero_range(uint8_t *p, uint64_t size)
73 return buffer_find_nonzero_offset(p, size) == size;
76 /* struct contains XBZRLE cache and a static page
77 used by the compression */
78 static struct {
79 /* buffer used for XBZRLE encoding */
80 uint8_t *encoded_buf;
81 /* buffer for storing page content */
82 uint8_t *current_buf;
83 /* Cache for XBZRLE, Protected by lock. */
84 PageCache *cache;
85 QemuMutex lock;
86 } XBZRLE;
88 /* buffer used for XBZRLE decoding */
89 static uint8_t *xbzrle_decoded_buf;
91 static void XBZRLE_cache_lock(void)
93 if (migrate_use_xbzrle())
94 qemu_mutex_lock(&XBZRLE.lock);
97 static void XBZRLE_cache_unlock(void)
99 if (migrate_use_xbzrle())
100 qemu_mutex_unlock(&XBZRLE.lock);
104 * called from qmp_migrate_set_cache_size in main thread, possibly while
105 * a migration is in progress.
106 * A running migration maybe using the cache and might finish during this
107 * call, hence changes to the cache are protected by XBZRLE.lock().
109 int64_t xbzrle_cache_resize(int64_t new_size)
111 PageCache *new_cache;
112 int64_t ret;
114 if (new_size < TARGET_PAGE_SIZE) {
115 return -1;
118 XBZRLE_cache_lock();
120 if (XBZRLE.cache != NULL) {
121 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
122 goto out_new_size;
124 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
125 TARGET_PAGE_SIZE);
126 if (!new_cache) {
127 error_report("Error creating cache");
128 ret = -1;
129 goto out;
132 cache_fini(XBZRLE.cache);
133 XBZRLE.cache = new_cache;
136 out_new_size:
137 ret = pow2floor(new_size);
138 out:
139 XBZRLE_cache_unlock();
140 return ret;
143 /* accounting for migration statistics */
144 typedef struct AccountingInfo {
145 uint64_t dup_pages;
146 uint64_t skipped_pages;
147 uint64_t norm_pages;
148 uint64_t iterations;
149 uint64_t xbzrle_bytes;
150 uint64_t xbzrle_pages;
151 uint64_t xbzrle_cache_miss;
152 double xbzrle_cache_miss_rate;
153 uint64_t xbzrle_overflows;
154 } AccountingInfo;
156 static AccountingInfo acct_info;
158 static void acct_clear(void)
160 memset(&acct_info, 0, sizeof(acct_info));
163 uint64_t dup_mig_bytes_transferred(void)
165 return acct_info.dup_pages * TARGET_PAGE_SIZE;
168 uint64_t dup_mig_pages_transferred(void)
170 return acct_info.dup_pages;
173 uint64_t skipped_mig_bytes_transferred(void)
175 return acct_info.skipped_pages * TARGET_PAGE_SIZE;
178 uint64_t skipped_mig_pages_transferred(void)
180 return acct_info.skipped_pages;
183 uint64_t norm_mig_bytes_transferred(void)
185 return acct_info.norm_pages * TARGET_PAGE_SIZE;
188 uint64_t norm_mig_pages_transferred(void)
190 return acct_info.norm_pages;
193 uint64_t xbzrle_mig_bytes_transferred(void)
195 return acct_info.xbzrle_bytes;
198 uint64_t xbzrle_mig_pages_transferred(void)
200 return acct_info.xbzrle_pages;
203 uint64_t xbzrle_mig_pages_cache_miss(void)
205 return acct_info.xbzrle_cache_miss;
208 double xbzrle_mig_cache_miss_rate(void)
210 return acct_info.xbzrle_cache_miss_rate;
213 uint64_t xbzrle_mig_pages_overflow(void)
215 return acct_info.xbzrle_overflows;
218 /* This is the last block that we have visited serching for dirty pages
220 static RAMBlock *last_seen_block;
221 /* This is the last block from where we have sent data */
222 static RAMBlock *last_sent_block;
223 static ram_addr_t last_offset;
224 static QemuMutex migration_bitmap_mutex;
225 static uint64_t migration_dirty_pages;
226 static uint32_t last_version;
227 static bool ram_bulk_stage;
229 /* used by the search for pages to send */
230 struct PageSearchStatus {
231 /* Current block being searched */
232 RAMBlock *block;
233 /* Current offset to search from */
234 ram_addr_t offset;
235 /* Set once we wrap around */
236 bool complete_round;
238 typedef struct PageSearchStatus PageSearchStatus;
240 static struct BitmapRcu {
241 struct rcu_head rcu;
242 /* Main migration bitmap */
243 unsigned long *bmap;
244 /* bitmap of pages that haven't been sent even once
245 * only maintained and used in postcopy at the moment
246 * where it's used to send the dirtymap at the start
247 * of the postcopy phase
249 unsigned long *unsentmap;
250 } *migration_bitmap_rcu;
252 struct CompressParam {
253 bool start;
254 bool done;
255 QEMUFile *file;
256 QemuMutex mutex;
257 QemuCond cond;
258 RAMBlock *block;
259 ram_addr_t offset;
261 typedef struct CompressParam CompressParam;
263 struct DecompressParam {
264 bool start;
265 QemuMutex mutex;
266 QemuCond cond;
267 void *des;
268 uint8 *compbuf;
269 int len;
271 typedef struct DecompressParam DecompressParam;
273 static CompressParam *comp_param;
274 static QemuThread *compress_threads;
275 /* comp_done_cond is used to wake up the migration thread when
276 * one of the compression threads has finished the compression.
277 * comp_done_lock is used to co-work with comp_done_cond.
279 static QemuMutex *comp_done_lock;
280 static QemuCond *comp_done_cond;
281 /* The empty QEMUFileOps will be used by file in CompressParam */
282 static const QEMUFileOps empty_ops = { };
284 static bool compression_switch;
285 static bool quit_comp_thread;
286 static bool quit_decomp_thread;
287 static DecompressParam *decomp_param;
288 static QemuThread *decompress_threads;
289 static uint8_t *compressed_data_buf;
291 static int do_compress_ram_page(CompressParam *param);
293 static void *do_data_compress(void *opaque)
295 CompressParam *param = opaque;
297 while (!quit_comp_thread) {
298 qemu_mutex_lock(&param->mutex);
299 /* Re-check the quit_comp_thread in case of
300 * terminate_compression_threads is called just before
301 * qemu_mutex_lock(&param->mutex) and after
302 * while(!quit_comp_thread), re-check it here can make
303 * sure the compression thread terminate as expected.
305 while (!param->start && !quit_comp_thread) {
306 qemu_cond_wait(&param->cond, &param->mutex);
308 if (!quit_comp_thread) {
309 do_compress_ram_page(param);
311 param->start = false;
312 qemu_mutex_unlock(&param->mutex);
314 qemu_mutex_lock(comp_done_lock);
315 param->done = true;
316 qemu_cond_signal(comp_done_cond);
317 qemu_mutex_unlock(comp_done_lock);
320 return NULL;
323 static inline void terminate_compression_threads(void)
325 int idx, thread_count;
327 thread_count = migrate_compress_threads();
328 quit_comp_thread = true;
329 for (idx = 0; idx < thread_count; idx++) {
330 qemu_mutex_lock(&comp_param[idx].mutex);
331 qemu_cond_signal(&comp_param[idx].cond);
332 qemu_mutex_unlock(&comp_param[idx].mutex);
336 void migrate_compress_threads_join(void)
338 int i, thread_count;
340 if (!migrate_use_compression()) {
341 return;
343 terminate_compression_threads();
344 thread_count = migrate_compress_threads();
345 for (i = 0; i < thread_count; i++) {
346 qemu_thread_join(compress_threads + i);
347 qemu_fclose(comp_param[i].file);
348 qemu_mutex_destroy(&comp_param[i].mutex);
349 qemu_cond_destroy(&comp_param[i].cond);
351 qemu_mutex_destroy(comp_done_lock);
352 qemu_cond_destroy(comp_done_cond);
353 g_free(compress_threads);
354 g_free(comp_param);
355 g_free(comp_done_cond);
356 g_free(comp_done_lock);
357 compress_threads = NULL;
358 comp_param = NULL;
359 comp_done_cond = NULL;
360 comp_done_lock = NULL;
363 void migrate_compress_threads_create(void)
365 int i, thread_count;
367 if (!migrate_use_compression()) {
368 return;
370 quit_comp_thread = false;
371 compression_switch = true;
372 thread_count = migrate_compress_threads();
373 compress_threads = g_new0(QemuThread, thread_count);
374 comp_param = g_new0(CompressParam, thread_count);
375 comp_done_cond = g_new0(QemuCond, 1);
376 comp_done_lock = g_new0(QemuMutex, 1);
377 qemu_cond_init(comp_done_cond);
378 qemu_mutex_init(comp_done_lock);
379 for (i = 0; i < thread_count; i++) {
380 /* com_param[i].file is just used as a dummy buffer to save data, set
381 * it's ops to empty.
383 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
384 comp_param[i].done = true;
385 qemu_mutex_init(&comp_param[i].mutex);
386 qemu_cond_init(&comp_param[i].cond);
387 qemu_thread_create(compress_threads + i, "compress",
388 do_data_compress, comp_param + i,
389 QEMU_THREAD_JOINABLE);
394 * save_page_header: Write page header to wire
396 * If this is the 1st block, it also writes the block identification
398 * Returns: Number of bytes written
400 * @f: QEMUFile where to send the data
401 * @block: block that contains the page we want to send
402 * @offset: offset inside the block for the page
403 * in the lower bits, it contains flags
405 static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
407 size_t size, len;
409 qemu_put_be64(f, offset);
410 size = 8;
412 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
413 len = strlen(block->idstr);
414 qemu_put_byte(f, len);
415 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
416 size += 1 + len;
418 return size;
421 /* Reduce amount of guest cpu execution to hopefully slow down memory writes.
422 * If guest dirty memory rate is reduced below the rate at which we can
423 * transfer pages to the destination then we should be able to complete
424 * migration. Some workloads dirty memory way too fast and will not effectively
425 * converge, even with auto-converge.
427 static void mig_throttle_guest_down(void)
429 MigrationState *s = migrate_get_current();
430 uint64_t pct_initial =
431 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL];
432 uint64_t pct_icrement =
433 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT];
435 /* We have not started throttling yet. Let's start it. */
436 if (!cpu_throttle_active()) {
437 cpu_throttle_set(pct_initial);
438 } else {
439 /* Throttling already on, just increase the rate */
440 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
444 /* Update the xbzrle cache to reflect a page that's been sent as all 0.
445 * The important thing is that a stale (not-yet-0'd) page be replaced
446 * by the new data.
447 * As a bonus, if the page wasn't in the cache it gets added so that
448 * when a small write is made into the 0'd page it gets XBZRLE sent
450 static void xbzrle_cache_zero_page(ram_addr_t current_addr)
452 if (ram_bulk_stage || !migrate_use_xbzrle()) {
453 return;
456 /* We don't care if this fails to allocate a new cache page
457 * as long as it updated an old one */
458 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
459 bitmap_sync_count);
462 #define ENCODING_FLAG_XBZRLE 0x1
465 * save_xbzrle_page: compress and send current page
467 * Returns: 1 means that we wrote the page
468 * 0 means that page is identical to the one already sent
469 * -1 means that xbzrle would be longer than normal
471 * @f: QEMUFile where to send the data
472 * @current_data:
473 * @current_addr:
474 * @block: block that contains the page we want to send
475 * @offset: offset inside the block for the page
476 * @last_stage: if we are at the completion stage
477 * @bytes_transferred: increase it with the number of transferred bytes
479 static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
480 ram_addr_t current_addr, RAMBlock *block,
481 ram_addr_t offset, bool last_stage,
482 uint64_t *bytes_transferred)
484 int encoded_len = 0, bytes_xbzrle;
485 uint8_t *prev_cached_page;
487 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
488 acct_info.xbzrle_cache_miss++;
489 if (!last_stage) {
490 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
491 bitmap_sync_count) == -1) {
492 return -1;
493 } else {
494 /* update *current_data when the page has been
495 inserted into cache */
496 *current_data = get_cached_data(XBZRLE.cache, current_addr);
499 return -1;
502 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
504 /* save current buffer into memory */
505 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
507 /* XBZRLE encoding (if there is no overflow) */
508 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
509 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
510 TARGET_PAGE_SIZE);
511 if (encoded_len == 0) {
512 DPRINTF("Skipping unmodified page\n");
513 return 0;
514 } else if (encoded_len == -1) {
515 DPRINTF("Overflow\n");
516 acct_info.xbzrle_overflows++;
517 /* update data in the cache */
518 if (!last_stage) {
519 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
520 *current_data = prev_cached_page;
522 return -1;
525 /* we need to update the data in the cache, in order to get the same data */
526 if (!last_stage) {
527 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
530 /* Send XBZRLE based compressed page */
531 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
532 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
533 qemu_put_be16(f, encoded_len);
534 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
535 bytes_xbzrle += encoded_len + 1 + 2;
536 acct_info.xbzrle_pages++;
537 acct_info.xbzrle_bytes += bytes_xbzrle;
538 *bytes_transferred += bytes_xbzrle;
540 return 1;
543 /* Called with rcu_read_lock() to protect migration_bitmap
544 * rb: The RAMBlock to search for dirty pages in
545 * start: Start address (typically so we can continue from previous page)
546 * ram_addr_abs: Pointer into which to store the address of the dirty page
547 * within the global ram_addr space
549 * Returns: byte offset within memory region of the start of a dirty page
551 static inline
552 ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb,
553 ram_addr_t start,
554 ram_addr_t *ram_addr_abs)
556 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
557 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
558 uint64_t rb_size = rb->used_length;
559 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
560 unsigned long *bitmap;
562 unsigned long next;
564 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
565 if (ram_bulk_stage && nr > base) {
566 next = nr + 1;
567 } else {
568 next = find_next_bit(bitmap, size, nr);
571 *ram_addr_abs = next << TARGET_PAGE_BITS;
572 return (next - base) << TARGET_PAGE_BITS;
575 static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
577 bool ret;
578 int nr = addr >> TARGET_PAGE_BITS;
579 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
581 ret = test_and_clear_bit(nr, bitmap);
583 if (ret) {
584 migration_dirty_pages--;
586 return ret;
589 static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
591 unsigned long *bitmap;
592 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
593 migration_dirty_pages +=
594 cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
597 /* Fix me: there are too many global variables used in migration process. */
598 static int64_t start_time;
599 static int64_t bytes_xfer_prev;
600 static int64_t num_dirty_pages_period;
601 static uint64_t xbzrle_cache_miss_prev;
602 static uint64_t iterations_prev;
604 static void migration_bitmap_sync_init(void)
606 start_time = 0;
607 bytes_xfer_prev = 0;
608 num_dirty_pages_period = 0;
609 xbzrle_cache_miss_prev = 0;
610 iterations_prev = 0;
613 /* Called with iothread lock held, to protect ram_list.dirty_memory[] */
614 static void migration_bitmap_sync(void)
616 RAMBlock *block;
617 uint64_t num_dirty_pages_init = migration_dirty_pages;
618 MigrationState *s = migrate_get_current();
619 int64_t end_time;
620 int64_t bytes_xfer_now;
622 bitmap_sync_count++;
624 if (!bytes_xfer_prev) {
625 bytes_xfer_prev = ram_bytes_transferred();
628 if (!start_time) {
629 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
632 trace_migration_bitmap_sync_start();
633 address_space_sync_dirty_bitmap(&address_space_memory);
635 qemu_mutex_lock(&migration_bitmap_mutex);
636 rcu_read_lock();
637 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
638 migration_bitmap_sync_range(block->offset, block->used_length);
640 rcu_read_unlock();
641 qemu_mutex_unlock(&migration_bitmap_mutex);
643 trace_migration_bitmap_sync_end(migration_dirty_pages
644 - num_dirty_pages_init);
645 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
646 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
648 /* more than 1 second = 1000 millisecons */
649 if (end_time > start_time + 1000) {
650 if (migrate_auto_converge()) {
651 /* The following detection logic can be refined later. For now:
652 Check to see if the dirtied bytes is 50% more than the approx.
653 amount of bytes that just got transferred since the last time we
654 were in this routine. If that happens twice, start or increase
655 throttling */
656 bytes_xfer_now = ram_bytes_transferred();
658 if (s->dirty_pages_rate &&
659 (num_dirty_pages_period * TARGET_PAGE_SIZE >
660 (bytes_xfer_now - bytes_xfer_prev)/2) &&
661 (dirty_rate_high_cnt++ >= 2)) {
662 trace_migration_throttle();
663 dirty_rate_high_cnt = 0;
664 mig_throttle_guest_down();
666 bytes_xfer_prev = bytes_xfer_now;
669 if (migrate_use_xbzrle()) {
670 if (iterations_prev != acct_info.iterations) {
671 acct_info.xbzrle_cache_miss_rate =
672 (double)(acct_info.xbzrle_cache_miss -
673 xbzrle_cache_miss_prev) /
674 (acct_info.iterations - iterations_prev);
676 iterations_prev = acct_info.iterations;
677 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
679 s->dirty_pages_rate = num_dirty_pages_period * 1000
680 / (end_time - start_time);
681 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
682 start_time = end_time;
683 num_dirty_pages_period = 0;
685 s->dirty_sync_count = bitmap_sync_count;
686 if (migrate_use_events()) {
687 qapi_event_send_migration_pass(bitmap_sync_count, NULL);
692 * save_zero_page: Send the zero page to the stream
694 * Returns: Number of pages written.
696 * @f: QEMUFile where to send the data
697 * @block: block that contains the page we want to send
698 * @offset: offset inside the block for the page
699 * @p: pointer to the page
700 * @bytes_transferred: increase it with the number of transferred bytes
702 static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
703 uint8_t *p, uint64_t *bytes_transferred)
705 int pages = -1;
707 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
708 acct_info.dup_pages++;
709 *bytes_transferred += save_page_header(f, block,
710 offset | RAM_SAVE_FLAG_COMPRESS);
711 qemu_put_byte(f, 0);
712 *bytes_transferred += 1;
713 pages = 1;
716 return pages;
720 * ram_save_page: Send the given page to the stream
722 * Returns: Number of pages written.
723 * < 0 - error
724 * >=0 - Number of pages written - this might legally be 0
725 * if xbzrle noticed the page was the same.
727 * @f: QEMUFile where to send the data
728 * @block: block that contains the page we want to send
729 * @offset: offset inside the block for the page
730 * @last_stage: if we are at the completion stage
731 * @bytes_transferred: increase it with the number of transferred bytes
733 static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
734 bool last_stage, uint64_t *bytes_transferred)
736 int pages = -1;
737 uint64_t bytes_xmit;
738 ram_addr_t current_addr;
739 uint8_t *p;
740 int ret;
741 bool send_async = true;
743 p = block->host + offset;
745 /* In doubt sent page as normal */
746 bytes_xmit = 0;
747 ret = ram_control_save_page(f, block->offset,
748 offset, TARGET_PAGE_SIZE, &bytes_xmit);
749 if (bytes_xmit) {
750 *bytes_transferred += bytes_xmit;
751 pages = 1;
754 XBZRLE_cache_lock();
756 current_addr = block->offset + offset;
758 if (block == last_sent_block) {
759 offset |= RAM_SAVE_FLAG_CONTINUE;
761 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
762 if (ret != RAM_SAVE_CONTROL_DELAYED) {
763 if (bytes_xmit > 0) {
764 acct_info.norm_pages++;
765 } else if (bytes_xmit == 0) {
766 acct_info.dup_pages++;
769 } else {
770 pages = save_zero_page(f, block, offset, p, bytes_transferred);
771 if (pages > 0) {
772 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
773 * page would be stale
775 xbzrle_cache_zero_page(current_addr);
776 } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
777 pages = save_xbzrle_page(f, &p, current_addr, block,
778 offset, last_stage, bytes_transferred);
779 if (!last_stage) {
780 /* Can't send this cached data async, since the cache page
781 * might get updated before it gets to the wire
783 send_async = false;
788 /* XBZRLE overflow or normal page */
789 if (pages == -1) {
790 *bytes_transferred += save_page_header(f, block,
791 offset | RAM_SAVE_FLAG_PAGE);
792 if (send_async) {
793 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
794 } else {
795 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
797 *bytes_transferred += TARGET_PAGE_SIZE;
798 pages = 1;
799 acct_info.norm_pages++;
802 XBZRLE_cache_unlock();
804 return pages;
807 static int do_compress_ram_page(CompressParam *param)
809 int bytes_sent, blen;
810 uint8_t *p;
811 RAMBlock *block = param->block;
812 ram_addr_t offset = param->offset;
814 p = block->host + (offset & TARGET_PAGE_MASK);
816 bytes_sent = save_page_header(param->file, block, offset |
817 RAM_SAVE_FLAG_COMPRESS_PAGE);
818 blen = qemu_put_compression_data(param->file, p, TARGET_PAGE_SIZE,
819 migrate_compress_level());
820 bytes_sent += blen;
822 return bytes_sent;
825 static inline void start_compression(CompressParam *param)
827 param->done = false;
828 qemu_mutex_lock(&param->mutex);
829 param->start = true;
830 qemu_cond_signal(&param->cond);
831 qemu_mutex_unlock(&param->mutex);
834 static inline void start_decompression(DecompressParam *param)
836 qemu_mutex_lock(&param->mutex);
837 param->start = true;
838 qemu_cond_signal(&param->cond);
839 qemu_mutex_unlock(&param->mutex);
842 static uint64_t bytes_transferred;
844 static void flush_compressed_data(QEMUFile *f)
846 int idx, len, thread_count;
848 if (!migrate_use_compression()) {
849 return;
851 thread_count = migrate_compress_threads();
852 for (idx = 0; idx < thread_count; idx++) {
853 if (!comp_param[idx].done) {
854 qemu_mutex_lock(comp_done_lock);
855 while (!comp_param[idx].done && !quit_comp_thread) {
856 qemu_cond_wait(comp_done_cond, comp_done_lock);
858 qemu_mutex_unlock(comp_done_lock);
860 if (!quit_comp_thread) {
861 len = qemu_put_qemu_file(f, comp_param[idx].file);
862 bytes_transferred += len;
867 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
868 ram_addr_t offset)
870 param->block = block;
871 param->offset = offset;
874 static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
875 ram_addr_t offset,
876 uint64_t *bytes_transferred)
878 int idx, thread_count, bytes_xmit = -1, pages = -1;
880 thread_count = migrate_compress_threads();
881 qemu_mutex_lock(comp_done_lock);
882 while (true) {
883 for (idx = 0; idx < thread_count; idx++) {
884 if (comp_param[idx].done) {
885 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
886 set_compress_params(&comp_param[idx], block, offset);
887 start_compression(&comp_param[idx]);
888 pages = 1;
889 acct_info.norm_pages++;
890 *bytes_transferred += bytes_xmit;
891 break;
894 if (pages > 0) {
895 break;
896 } else {
897 qemu_cond_wait(comp_done_cond, comp_done_lock);
900 qemu_mutex_unlock(comp_done_lock);
902 return pages;
906 * ram_save_compressed_page: compress the given page and send it to the stream
908 * Returns: Number of pages written.
910 * @f: QEMUFile where to send the data
911 * @block: block that contains the page we want to send
912 * @offset: offset inside the block for the page
913 * @last_stage: if we are at the completion stage
914 * @bytes_transferred: increase it with the number of transferred bytes
916 static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
917 ram_addr_t offset, bool last_stage,
918 uint64_t *bytes_transferred)
920 int pages = -1;
921 uint64_t bytes_xmit;
922 uint8_t *p;
923 int ret;
925 p = block->host + offset;
927 bytes_xmit = 0;
928 ret = ram_control_save_page(f, block->offset,
929 offset, TARGET_PAGE_SIZE, &bytes_xmit);
930 if (bytes_xmit) {
931 *bytes_transferred += bytes_xmit;
932 pages = 1;
934 if (block == last_sent_block) {
935 offset |= RAM_SAVE_FLAG_CONTINUE;
937 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
938 if (ret != RAM_SAVE_CONTROL_DELAYED) {
939 if (bytes_xmit > 0) {
940 acct_info.norm_pages++;
941 } else if (bytes_xmit == 0) {
942 acct_info.dup_pages++;
945 } else {
946 /* When starting the process of a new block, the first page of
947 * the block should be sent out before other pages in the same
948 * block, and all the pages in last block should have been sent
949 * out, keeping this order is important, because the 'cont' flag
950 * is used to avoid resending the block name.
952 if (block != last_sent_block) {
953 flush_compressed_data(f);
954 pages = save_zero_page(f, block, offset, p, bytes_transferred);
955 if (pages == -1) {
956 set_compress_params(&comp_param[0], block, offset);
957 /* Use the qemu thread to compress the data to make sure the
958 * first page is sent out before other pages
960 bytes_xmit = do_compress_ram_page(&comp_param[0]);
961 acct_info.norm_pages++;
962 qemu_put_qemu_file(f, comp_param[0].file);
963 *bytes_transferred += bytes_xmit;
964 pages = 1;
966 } else {
967 pages = save_zero_page(f, block, offset, p, bytes_transferred);
968 if (pages == -1) {
969 pages = compress_page_with_multi_thread(f, block, offset,
970 bytes_transferred);
975 return pages;
979 * Find the next dirty page and update any state associated with
980 * the search process.
982 * Returns: True if a page is found
984 * @f: Current migration stream.
985 * @pss: Data about the state of the current dirty page scan.
986 * @*again: Set to false if the search has scanned the whole of RAM
987 * *ram_addr_abs: Pointer into which to store the address of the dirty page
988 * within the global ram_addr space
990 static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
991 bool *again, ram_addr_t *ram_addr_abs)
993 pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset,
994 ram_addr_abs);
995 if (pss->complete_round && pss->block == last_seen_block &&
996 pss->offset >= last_offset) {
998 * We've been once around the RAM and haven't found anything.
999 * Give up.
1001 *again = false;
1002 return false;
1004 if (pss->offset >= pss->block->used_length) {
1005 /* Didn't find anything in this RAM Block */
1006 pss->offset = 0;
1007 pss->block = QLIST_NEXT_RCU(pss->block, next);
1008 if (!pss->block) {
1009 /* Hit the end of the list */
1010 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1011 /* Flag that we've looped */
1012 pss->complete_round = true;
1013 ram_bulk_stage = false;
1014 if (migrate_use_xbzrle()) {
1015 /* If xbzrle is on, stop using the data compression at this
1016 * point. In theory, xbzrle can do better than compression.
1018 flush_compressed_data(f);
1019 compression_switch = false;
1022 /* Didn't find anything this time, but try again on the new block */
1023 *again = true;
1024 return false;
1025 } else {
1026 /* Can go around again, but... */
1027 *again = true;
1028 /* We've found something so probably don't need to */
1029 return true;
1034 * Helper for 'get_queued_page' - gets a page off the queue
1035 * ms: MigrationState in
1036 * *offset: Used to return the offset within the RAMBlock
1037 * ram_addr_abs: global offset in the dirty/sent bitmaps
1039 * Returns: block (or NULL if none available)
1041 static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1042 ram_addr_t *ram_addr_abs)
1044 RAMBlock *block = NULL;
1046 qemu_mutex_lock(&ms->src_page_req_mutex);
1047 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1048 struct MigrationSrcPageRequest *entry =
1049 QSIMPLEQ_FIRST(&ms->src_page_requests);
1050 block = entry->rb;
1051 *offset = entry->offset;
1052 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1053 TARGET_PAGE_MASK;
1055 if (entry->len > TARGET_PAGE_SIZE) {
1056 entry->len -= TARGET_PAGE_SIZE;
1057 entry->offset += TARGET_PAGE_SIZE;
1058 } else {
1059 memory_region_unref(block->mr);
1060 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1061 g_free(entry);
1064 qemu_mutex_unlock(&ms->src_page_req_mutex);
1066 return block;
1070 * Unqueue a page from the queue fed by postcopy page requests; skips pages
1071 * that are already sent (!dirty)
1073 * ms: MigrationState in
1074 * pss: PageSearchStatus structure updated with found block/offset
1075 * ram_addr_abs: global offset in the dirty/sent bitmaps
1077 * Returns: true if a queued page is found
1079 static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
1080 ram_addr_t *ram_addr_abs)
1082 RAMBlock *block;
1083 ram_addr_t offset;
1084 bool dirty;
1086 do {
1087 block = unqueue_page(ms, &offset, ram_addr_abs);
1089 * We're sending this page, and since it's postcopy nothing else
1090 * will dirty it, and we must make sure it doesn't get sent again
1091 * even if this queue request was received after the background
1092 * search already sent it.
1094 if (block) {
1095 unsigned long *bitmap;
1096 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1097 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1098 if (!dirty) {
1099 trace_get_queued_page_not_dirty(
1100 block->idstr, (uint64_t)offset,
1101 (uint64_t)*ram_addr_abs,
1102 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1103 atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1104 } else {
1105 trace_get_queued_page(block->idstr,
1106 (uint64_t)offset,
1107 (uint64_t)*ram_addr_abs);
1111 } while (block && !dirty);
1113 if (block) {
1115 * As soon as we start servicing pages out of order, then we have
1116 * to kill the bulk stage, since the bulk stage assumes
1117 * in (migration_bitmap_find_and_reset_dirty) that every page is
1118 * dirty, that's no longer true.
1120 ram_bulk_stage = false;
1123 * We want the background search to continue from the queued page
1124 * since the guest is likely to want other pages near to the page
1125 * it just requested.
1127 pss->block = block;
1128 pss->offset = offset;
1131 return !!block;
1135 * flush_page_queue: Flush any remaining pages in the ram request queue
1136 * it should be empty at the end anyway, but in error cases there may be
1137 * some left.
1139 * ms: MigrationState
1141 void flush_page_queue(MigrationState *ms)
1143 struct MigrationSrcPageRequest *mspr, *next_mspr;
1144 /* This queue generally should be empty - but in the case of a failed
1145 * migration might have some droppings in.
1147 rcu_read_lock();
1148 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1149 memory_region_unref(mspr->rb->mr);
1150 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1151 g_free(mspr);
1153 rcu_read_unlock();
1157 * Queue the pages for transmission, e.g. a request from postcopy destination
1158 * ms: MigrationStatus in which the queue is held
1159 * rbname: The RAMBlock the request is for - may be NULL (to mean reuse last)
1160 * start: Offset from the start of the RAMBlock
1161 * len: Length (in bytes) to send
1162 * Return: 0 on success
1164 int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1165 ram_addr_t start, ram_addr_t len)
1167 RAMBlock *ramblock;
1169 rcu_read_lock();
1170 if (!rbname) {
1171 /* Reuse last RAMBlock */
1172 ramblock = ms->last_req_rb;
1174 if (!ramblock) {
1176 * Shouldn't happen, we can't reuse the last RAMBlock if
1177 * it's the 1st request.
1179 error_report("ram_save_queue_pages no previous block");
1180 goto err;
1182 } else {
1183 ramblock = qemu_ram_block_by_name(rbname);
1185 if (!ramblock) {
1186 /* We shouldn't be asked for a non-existent RAMBlock */
1187 error_report("ram_save_queue_pages no block '%s'", rbname);
1188 goto err;
1190 ms->last_req_rb = ramblock;
1192 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1193 if (start+len > ramblock->used_length) {
1194 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1195 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1196 __func__, start, len, ramblock->used_length);
1197 goto err;
1200 struct MigrationSrcPageRequest *new_entry =
1201 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1202 new_entry->rb = ramblock;
1203 new_entry->offset = start;
1204 new_entry->len = len;
1206 memory_region_ref(ramblock->mr);
1207 qemu_mutex_lock(&ms->src_page_req_mutex);
1208 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1209 qemu_mutex_unlock(&ms->src_page_req_mutex);
1210 rcu_read_unlock();
1212 return 0;
1214 err:
1215 rcu_read_unlock();
1216 return -1;
1220 * ram_save_target_page: Save one target page
1223 * @f: QEMUFile where to send the data
1224 * @block: pointer to block that contains the page we want to send
1225 * @offset: offset inside the block for the page;
1226 * @last_stage: if we are at the completion stage
1227 * @bytes_transferred: increase it with the number of transferred bytes
1228 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1230 * Returns: Number of pages written.
1232 static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
1233 RAMBlock *block, ram_addr_t offset,
1234 bool last_stage,
1235 uint64_t *bytes_transferred,
1236 ram_addr_t dirty_ram_abs)
1238 int res = 0;
1240 /* Check the pages is dirty and if it is send it */
1241 if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1242 unsigned long *unsentmap;
1243 if (compression_switch && migrate_use_compression()) {
1244 res = ram_save_compressed_page(f, block, offset,
1245 last_stage,
1246 bytes_transferred);
1247 } else {
1248 res = ram_save_page(f, block, offset, last_stage,
1249 bytes_transferred);
1252 if (res < 0) {
1253 return res;
1255 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1256 if (unsentmap) {
1257 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1259 /* Only update last_sent_block if a block was actually sent; xbzrle
1260 * might have decided the page was identical so didn't bother writing
1261 * to the stream.
1263 if (res > 0) {
1264 last_sent_block = block;
1268 return res;
1272 * ram_save_host_page: Starting at *offset send pages upto the end
1273 * of the current host page. It's valid for the initial
1274 * offset to point into the middle of a host page
1275 * in which case the remainder of the hostpage is sent.
1276 * Only dirty target pages are sent.
1278 * Returns: Number of pages written.
1280 * @f: QEMUFile where to send the data
1281 * @block: pointer to block that contains the page we want to send
1282 * @offset: offset inside the block for the page; updated to last target page
1283 * sent
1284 * @last_stage: if we are at the completion stage
1285 * @bytes_transferred: increase it with the number of transferred bytes
1286 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1288 static int ram_save_host_page(MigrationState *ms, QEMUFile *f, RAMBlock *block,
1289 ram_addr_t *offset, bool last_stage,
1290 uint64_t *bytes_transferred,
1291 ram_addr_t dirty_ram_abs)
1293 int tmppages, pages = 0;
1294 do {
1295 tmppages = ram_save_target_page(ms, f, block, *offset, last_stage,
1296 bytes_transferred, dirty_ram_abs);
1297 if (tmppages < 0) {
1298 return tmppages;
1301 pages += tmppages;
1302 *offset += TARGET_PAGE_SIZE;
1303 dirty_ram_abs += TARGET_PAGE_SIZE;
1304 } while (*offset & (qemu_host_page_size - 1));
1306 /* The offset we leave with is the last one we looked at */
1307 *offset -= TARGET_PAGE_SIZE;
1308 return pages;
1312 * ram_find_and_save_block: Finds a dirty page and sends it to f
1314 * Called within an RCU critical section.
1316 * Returns: The number of pages written
1317 * 0 means no dirty pages
1319 * @f: QEMUFile where to send the data
1320 * @last_stage: if we are at the completion stage
1321 * @bytes_transferred: increase it with the number of transferred bytes
1323 * On systems where host-page-size > target-page-size it will send all the
1324 * pages in a host page that are dirty.
1327 static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
1328 uint64_t *bytes_transferred)
1330 PageSearchStatus pss;
1331 MigrationState *ms = migrate_get_current();
1332 int pages = 0;
1333 bool again, found;
1334 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1335 ram_addr_t space */
1337 pss.block = last_seen_block;
1338 pss.offset = last_offset;
1339 pss.complete_round = false;
1341 if (!pss.block) {
1342 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1345 do {
1346 again = true;
1347 found = get_queued_page(ms, &pss, &dirty_ram_abs);
1349 if (!found) {
1350 /* priority queue empty, so just search for something dirty */
1351 found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
1354 if (found) {
1355 pages = ram_save_host_page(ms, f, pss.block, &pss.offset,
1356 last_stage, bytes_transferred,
1357 dirty_ram_abs);
1359 } while (!pages && again);
1361 last_seen_block = pss.block;
1362 last_offset = pss.offset;
1364 return pages;
1367 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1369 uint64_t pages = size / TARGET_PAGE_SIZE;
1370 if (zero) {
1371 acct_info.dup_pages += pages;
1372 } else {
1373 acct_info.norm_pages += pages;
1374 bytes_transferred += size;
1375 qemu_update_position(f, size);
1379 static ram_addr_t ram_save_remaining(void)
1381 return migration_dirty_pages;
1384 uint64_t ram_bytes_remaining(void)
1386 return ram_save_remaining() * TARGET_PAGE_SIZE;
1389 uint64_t ram_bytes_transferred(void)
1391 return bytes_transferred;
1394 uint64_t ram_bytes_total(void)
1396 RAMBlock *block;
1397 uint64_t total = 0;
1399 rcu_read_lock();
1400 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1401 total += block->used_length;
1402 rcu_read_unlock();
1403 return total;
1406 void free_xbzrle_decoded_buf(void)
1408 g_free(xbzrle_decoded_buf);
1409 xbzrle_decoded_buf = NULL;
1412 static void migration_bitmap_free(struct BitmapRcu *bmap)
1414 g_free(bmap->bmap);
1415 g_free(bmap->unsentmap);
1416 g_free(bmap);
1419 static void ram_migration_cleanup(void *opaque)
1421 /* caller have hold iothread lock or is in a bh, so there is
1422 * no writing race against this migration_bitmap
1424 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1425 atomic_rcu_set(&migration_bitmap_rcu, NULL);
1426 if (bitmap) {
1427 memory_global_dirty_log_stop();
1428 call_rcu(bitmap, migration_bitmap_free, rcu);
1431 XBZRLE_cache_lock();
1432 if (XBZRLE.cache) {
1433 cache_fini(XBZRLE.cache);
1434 g_free(XBZRLE.encoded_buf);
1435 g_free(XBZRLE.current_buf);
1436 XBZRLE.cache = NULL;
1437 XBZRLE.encoded_buf = NULL;
1438 XBZRLE.current_buf = NULL;
1440 XBZRLE_cache_unlock();
1443 static void reset_ram_globals(void)
1445 last_seen_block = NULL;
1446 last_sent_block = NULL;
1447 last_offset = 0;
1448 last_version = ram_list.version;
1449 ram_bulk_stage = true;
1452 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1454 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1456 /* called in qemu main thread, so there is
1457 * no writing race against this migration_bitmap
1459 if (migration_bitmap_rcu) {
1460 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1461 bitmap = g_new(struct BitmapRcu, 1);
1462 bitmap->bmap = bitmap_new(new);
1464 /* prevent migration_bitmap content from being set bit
1465 * by migration_bitmap_sync_range() at the same time.
1466 * it is safe to migration if migration_bitmap is cleared bit
1467 * at the same time.
1469 qemu_mutex_lock(&migration_bitmap_mutex);
1470 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1471 bitmap_set(bitmap->bmap, old, new - old);
1473 /* We don't have a way to safely extend the sentmap
1474 * with RCU; so mark it as missing, entry to postcopy
1475 * will fail.
1477 bitmap->unsentmap = NULL;
1479 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
1480 qemu_mutex_unlock(&migration_bitmap_mutex);
1481 migration_dirty_pages += new - old;
1482 call_rcu(old_bitmap, migration_bitmap_free, rcu);
1487 * 'expected' is the value you expect the bitmap mostly to be full
1488 * of; it won't bother printing lines that are all this value.
1489 * If 'todump' is null the migration bitmap is dumped.
1491 void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1493 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1495 int64_t cur;
1496 int64_t linelen = 128;
1497 char linebuf[129];
1499 if (!todump) {
1500 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1503 for (cur = 0; cur < ram_pages; cur += linelen) {
1504 int64_t curb;
1505 bool found = false;
1507 * Last line; catch the case where the line length
1508 * is longer than remaining ram
1510 if (cur + linelen > ram_pages) {
1511 linelen = ram_pages - cur;
1513 for (curb = 0; curb < linelen; curb++) {
1514 bool thisbit = test_bit(cur + curb, todump);
1515 linebuf[curb] = thisbit ? '1' : '.';
1516 found = found || (thisbit != expected);
1518 if (found) {
1519 linebuf[curb] = '\0';
1520 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1525 /* **** functions for postcopy ***** */
1528 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1529 * Note: At this point the 'unsentmap' is the processed bitmap combined
1530 * with the dirtymap; so a '1' means it's either dirty or unsent.
1531 * start,length: Indexes into the bitmap for the first bit
1532 * representing the named block and length in target-pages
1534 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1535 PostcopyDiscardState *pds,
1536 unsigned long start,
1537 unsigned long length)
1539 unsigned long end = start + length; /* one after the end */
1540 unsigned long current;
1541 unsigned long *unsentmap;
1543 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1544 for (current = start; current < end; ) {
1545 unsigned long one = find_next_bit(unsentmap, end, current);
1547 if (one <= end) {
1548 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1549 unsigned long discard_length;
1551 if (zero >= end) {
1552 discard_length = end - one;
1553 } else {
1554 discard_length = zero - one;
1556 postcopy_discard_send_range(ms, pds, one, discard_length);
1557 current = one + discard_length;
1558 } else {
1559 current = one;
1563 return 0;
1567 * Utility for the outgoing postcopy code.
1568 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1569 * passing it bitmap indexes and name.
1570 * Returns: 0 on success
1571 * (qemu_ram_foreach_block ends up passing unscaled lengths
1572 * which would mean postcopy code would have to deal with target page)
1574 static int postcopy_each_ram_send_discard(MigrationState *ms)
1576 struct RAMBlock *block;
1577 int ret;
1579 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1580 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1581 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1582 first,
1583 block->idstr);
1586 * Postcopy sends chunks of bitmap over the wire, but it
1587 * just needs indexes at this point, avoids it having
1588 * target page specific code.
1590 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1591 block->used_length >> TARGET_PAGE_BITS);
1592 postcopy_discard_send_finish(ms, pds);
1593 if (ret) {
1594 return ret;
1598 return 0;
1602 * Helper for postcopy_chunk_hostpages; it's called twice to cleanup
1603 * the two bitmaps, that are similar, but one is inverted.
1605 * We search for runs of target-pages that don't start or end on a
1606 * host page boundary;
1607 * unsent_pass=true: Cleans up partially unsent host pages by searching
1608 * the unsentmap
1609 * unsent_pass=false: Cleans up partially dirty host pages by searching
1610 * the main migration bitmap
1613 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1614 RAMBlock *block,
1615 PostcopyDiscardState *pds)
1617 unsigned long *bitmap;
1618 unsigned long *unsentmap;
1619 unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
1620 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1621 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1622 unsigned long last = first + (len - 1);
1623 unsigned long run_start;
1625 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1626 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1628 if (unsent_pass) {
1629 /* Find a sent page */
1630 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1631 } else {
1632 /* Find a dirty page */
1633 run_start = find_next_bit(bitmap, last + 1, first);
1636 while (run_start <= last) {
1637 bool do_fixup = false;
1638 unsigned long fixup_start_addr;
1639 unsigned long host_offset;
1642 * If the start of this run of pages is in the middle of a host
1643 * page, then we need to fixup this host page.
1645 host_offset = run_start % host_ratio;
1646 if (host_offset) {
1647 do_fixup = true;
1648 run_start -= host_offset;
1649 fixup_start_addr = run_start;
1650 /* For the next pass */
1651 run_start = run_start + host_ratio;
1652 } else {
1653 /* Find the end of this run */
1654 unsigned long run_end;
1655 if (unsent_pass) {
1656 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1657 } else {
1658 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1661 * If the end isn't at the start of a host page, then the
1662 * run doesn't finish at the end of a host page
1663 * and we need to discard.
1665 host_offset = run_end % host_ratio;
1666 if (host_offset) {
1667 do_fixup = true;
1668 fixup_start_addr = run_end - host_offset;
1670 * This host page has gone, the next loop iteration starts
1671 * from after the fixup
1673 run_start = fixup_start_addr + host_ratio;
1674 } else {
1676 * No discards on this iteration, next loop starts from
1677 * next sent/dirty page
1679 run_start = run_end + 1;
1683 if (do_fixup) {
1684 unsigned long page;
1686 /* Tell the destination to discard this page */
1687 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1688 /* For the unsent_pass we:
1689 * discard partially sent pages
1690 * For the !unsent_pass (dirty) we:
1691 * discard partially dirty pages that were sent
1692 * (any partially sent pages were already discarded
1693 * by the previous unsent_pass)
1695 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1696 host_ratio);
1699 /* Clean up the bitmap */
1700 for (page = fixup_start_addr;
1701 page < fixup_start_addr + host_ratio; page++) {
1702 /* All pages in this host page are now not sent */
1703 set_bit(page, unsentmap);
1706 * Remark them as dirty, updating the count for any pages
1707 * that weren't previously dirty.
1709 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1713 if (unsent_pass) {
1714 /* Find the next sent page for the next iteration */
1715 run_start = find_next_zero_bit(unsentmap, last + 1,
1716 run_start);
1717 } else {
1718 /* Find the next dirty page for the next iteration */
1719 run_start = find_next_bit(bitmap, last + 1, run_start);
1725 * Utility for the outgoing postcopy code.
1727 * Discard any partially sent host-page size chunks, mark any partially
1728 * dirty host-page size chunks as all dirty.
1730 * Returns: 0 on success
1732 static int postcopy_chunk_hostpages(MigrationState *ms)
1734 struct RAMBlock *block;
1736 if (qemu_host_page_size == TARGET_PAGE_SIZE) {
1737 /* Easy case - TPS==HPS - nothing to be done */
1738 return 0;
1741 /* Easiest way to make sure we don't resume in the middle of a host-page */
1742 last_seen_block = NULL;
1743 last_sent_block = NULL;
1744 last_offset = 0;
1746 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1747 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1749 PostcopyDiscardState *pds =
1750 postcopy_discard_send_init(ms, first, block->idstr);
1752 /* First pass: Discard all partially sent host pages */
1753 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1755 * Second pass: Ensure that all partially dirty host pages are made
1756 * fully dirty.
1758 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1760 postcopy_discard_send_finish(ms, pds);
1761 } /* ram_list loop */
1763 return 0;
1767 * Transmit the set of pages to be discarded after precopy to the target
1768 * these are pages that:
1769 * a) Have been previously transmitted but are now dirty again
1770 * b) Pages that have never been transmitted, this ensures that
1771 * any pages on the destination that have been mapped by background
1772 * tasks get discarded (transparent huge pages is the specific concern)
1773 * Hopefully this is pretty sparse
1775 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1777 int ret;
1778 unsigned long *bitmap, *unsentmap;
1780 rcu_read_lock();
1782 /* This should be our last sync, the src is now paused */
1783 migration_bitmap_sync();
1785 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1786 if (!unsentmap) {
1787 /* We don't have a safe way to resize the sentmap, so
1788 * if the bitmap was resized it will be NULL at this
1789 * point.
1791 error_report("migration ram resized during precopy phase");
1792 rcu_read_unlock();
1793 return -EINVAL;
1796 /* Deal with TPS != HPS */
1797 ret = postcopy_chunk_hostpages(ms);
1798 if (ret) {
1799 rcu_read_unlock();
1800 return ret;
1804 * Update the unsentmap to be unsentmap = unsentmap | dirty
1806 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1807 bitmap_or(unsentmap, unsentmap, bitmap,
1808 last_ram_offset() >> TARGET_PAGE_BITS);
1811 trace_ram_postcopy_send_discard_bitmap();
1812 #ifdef DEBUG_POSTCOPY
1813 ram_debug_dump_bitmap(unsentmap, true);
1814 #endif
1816 ret = postcopy_each_ram_send_discard(ms);
1817 rcu_read_unlock();
1819 return ret;
1823 * At the start of the postcopy phase of migration, any now-dirty
1824 * precopied pages are discarded.
1826 * start, length describe a byte address range within the RAMBlock
1828 * Returns 0 on success.
1830 int ram_discard_range(MigrationIncomingState *mis,
1831 const char *block_name,
1832 uint64_t start, size_t length)
1834 int ret = -1;
1836 rcu_read_lock();
1837 RAMBlock *rb = qemu_ram_block_by_name(block_name);
1839 if (!rb) {
1840 error_report("ram_discard_range: Failed to find block '%s'",
1841 block_name);
1842 goto err;
1845 uint8_t *host_startaddr = rb->host + start;
1847 if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
1848 error_report("ram_discard_range: Unaligned start address: %p",
1849 host_startaddr);
1850 goto err;
1853 if ((start + length) <= rb->used_length) {
1854 uint8_t *host_endaddr = host_startaddr + length;
1855 if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
1856 error_report("ram_discard_range: Unaligned end address: %p",
1857 host_endaddr);
1858 goto err;
1860 ret = postcopy_ram_discard_range(mis, host_startaddr, length);
1861 } else {
1862 error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
1863 "/%zx/" RAM_ADDR_FMT")",
1864 block_name, start, length, rb->used_length);
1867 err:
1868 rcu_read_unlock();
1870 return ret;
1874 /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1875 * long-running RCU critical section. When rcu-reclaims in the code
1876 * start to become numerous it will be necessary to reduce the
1877 * granularity of these critical sections.
1880 static int ram_save_setup(QEMUFile *f, void *opaque)
1882 RAMBlock *block;
1883 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1885 dirty_rate_high_cnt = 0;
1886 bitmap_sync_count = 0;
1887 migration_bitmap_sync_init();
1888 qemu_mutex_init(&migration_bitmap_mutex);
1890 if (migrate_use_xbzrle()) {
1891 XBZRLE_cache_lock();
1892 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1893 TARGET_PAGE_SIZE,
1894 TARGET_PAGE_SIZE);
1895 if (!XBZRLE.cache) {
1896 XBZRLE_cache_unlock();
1897 error_report("Error creating cache");
1898 return -1;
1900 XBZRLE_cache_unlock();
1902 /* We prefer not to abort if there is no memory */
1903 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1904 if (!XBZRLE.encoded_buf) {
1905 error_report("Error allocating encoded_buf");
1906 return -1;
1909 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1910 if (!XBZRLE.current_buf) {
1911 error_report("Error allocating current_buf");
1912 g_free(XBZRLE.encoded_buf);
1913 XBZRLE.encoded_buf = NULL;
1914 return -1;
1917 acct_clear();
1920 /* iothread lock needed for ram_list.dirty_memory[] */
1921 qemu_mutex_lock_iothread();
1922 qemu_mutex_lock_ramlist();
1923 rcu_read_lock();
1924 bytes_transferred = 0;
1925 reset_ram_globals();
1927 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1928 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
1929 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
1930 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
1932 if (migrate_postcopy_ram()) {
1933 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
1934 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
1938 * Count the total number of pages used by ram blocks not including any
1939 * gaps due to alignment or unplugs.
1941 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1943 memory_global_dirty_log_start();
1944 migration_bitmap_sync();
1945 qemu_mutex_unlock_ramlist();
1946 qemu_mutex_unlock_iothread();
1948 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1950 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1951 qemu_put_byte(f, strlen(block->idstr));
1952 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1953 qemu_put_be64(f, block->used_length);
1956 rcu_read_unlock();
1958 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
1959 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
1961 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1963 return 0;
1966 static int ram_save_iterate(QEMUFile *f, void *opaque)
1968 int ret;
1969 int i;
1970 int64_t t0;
1971 int pages_sent = 0;
1973 rcu_read_lock();
1974 if (ram_list.version != last_version) {
1975 reset_ram_globals();
1978 /* Read version before ram_list.blocks */
1979 smp_rmb();
1981 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
1983 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1984 i = 0;
1985 while ((ret = qemu_file_rate_limit(f)) == 0) {
1986 int pages;
1988 pages = ram_find_and_save_block(f, false, &bytes_transferred);
1989 /* no more pages to sent */
1990 if (pages == 0) {
1991 break;
1993 pages_sent += pages;
1994 acct_info.iterations++;
1996 /* we want to check in the 1st loop, just in case it was the 1st time
1997 and we had to sync the dirty bitmap.
1998 qemu_get_clock_ns() is a bit expensive, so we only check each some
1999 iterations
2001 if ((i & 63) == 0) {
2002 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2003 if (t1 > MAX_WAIT) {
2004 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
2005 t1, i);
2006 break;
2009 i++;
2011 flush_compressed_data(f);
2012 rcu_read_unlock();
2015 * Must occur before EOS (or any QEMUFile operation)
2016 * because of RDMA protocol.
2018 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2020 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2021 bytes_transferred += 8;
2023 ret = qemu_file_get_error(f);
2024 if (ret < 0) {
2025 return ret;
2028 return pages_sent;
2031 /* Called with iothread lock */
2032 static int ram_save_complete(QEMUFile *f, void *opaque)
2034 rcu_read_lock();
2036 if (!migration_in_postcopy(migrate_get_current())) {
2037 migration_bitmap_sync();
2040 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2042 /* try transferring iterative blocks of memory */
2044 /* flush all remaining blocks regardless of rate limiting */
2045 while (true) {
2046 int pages;
2048 pages = ram_find_and_save_block(f, true, &bytes_transferred);
2049 /* no more blocks to sent */
2050 if (pages == 0) {
2051 break;
2055 flush_compressed_data(f);
2056 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2058 rcu_read_unlock();
2060 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2062 return 0;
2065 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2066 uint64_t *non_postcopiable_pending,
2067 uint64_t *postcopiable_pending)
2069 uint64_t remaining_size;
2071 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2073 if (!migration_in_postcopy(migrate_get_current()) &&
2074 remaining_size < max_size) {
2075 qemu_mutex_lock_iothread();
2076 rcu_read_lock();
2077 migration_bitmap_sync();
2078 rcu_read_unlock();
2079 qemu_mutex_unlock_iothread();
2080 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2083 /* We can do postcopy, and all the data is postcopiable */
2084 *postcopiable_pending += remaining_size;
2087 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2089 unsigned int xh_len;
2090 int xh_flags;
2092 if (!xbzrle_decoded_buf) {
2093 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2096 /* extract RLE header */
2097 xh_flags = qemu_get_byte(f);
2098 xh_len = qemu_get_be16(f);
2100 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2101 error_report("Failed to load XBZRLE page - wrong compression!");
2102 return -1;
2105 if (xh_len > TARGET_PAGE_SIZE) {
2106 error_report("Failed to load XBZRLE page - len overflow!");
2107 return -1;
2109 /* load data and decode */
2110 qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
2112 /* decode RLE */
2113 if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
2114 TARGET_PAGE_SIZE) == -1) {
2115 error_report("Failed to load XBZRLE page - decode error!");
2116 return -1;
2119 return 0;
2122 /* Must be called from within a rcu critical section.
2123 * Returns a pointer from within the RCU-protected ram_list.
2126 * Read a RAMBlock ID from the stream f, find the host address of the
2127 * start of that block and add on 'offset'
2129 * f: Stream to read from
2130 * offset: Offset within the block
2131 * flags: Page flags (mostly to see if it's a continuation of previous block)
2133 static inline void *host_from_stream_offset(QEMUFile *f,
2134 ram_addr_t offset,
2135 int flags)
2137 static RAMBlock *block = NULL;
2138 char id[256];
2139 uint8_t len;
2141 if (flags & RAM_SAVE_FLAG_CONTINUE) {
2142 if (!block || block->max_length <= offset) {
2143 error_report("Ack, bad migration stream!");
2144 return NULL;
2147 return block->host + offset;
2150 len = qemu_get_byte(f);
2151 qemu_get_buffer(f, (uint8_t *)id, len);
2152 id[len] = 0;
2154 block = qemu_ram_block_by_name(id);
2155 if (block && block->max_length > offset) {
2156 return block->host + offset;
2159 error_report("Can't find block %s", id);
2160 return NULL;
2164 * If a page (or a whole RDMA chunk) has been
2165 * determined to be zero, then zap it.
2167 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2169 if (ch != 0 || !is_zero_range(host, size)) {
2170 memset(host, ch, size);
2174 static void *do_data_decompress(void *opaque)
2176 DecompressParam *param = opaque;
2177 unsigned long pagesize;
2179 while (!quit_decomp_thread) {
2180 qemu_mutex_lock(&param->mutex);
2181 while (!param->start && !quit_decomp_thread) {
2182 qemu_cond_wait(&param->cond, &param->mutex);
2183 pagesize = TARGET_PAGE_SIZE;
2184 if (!quit_decomp_thread) {
2185 /* uncompress() will return failed in some case, especially
2186 * when the page is dirted when doing the compression, it's
2187 * not a problem because the dirty page will be retransferred
2188 * and uncompress() won't break the data in other pages.
2190 uncompress((Bytef *)param->des, &pagesize,
2191 (const Bytef *)param->compbuf, param->len);
2193 param->start = false;
2195 qemu_mutex_unlock(&param->mutex);
2198 return NULL;
2201 void migrate_decompress_threads_create(void)
2203 int i, thread_count;
2205 thread_count = migrate_decompress_threads();
2206 decompress_threads = g_new0(QemuThread, thread_count);
2207 decomp_param = g_new0(DecompressParam, thread_count);
2208 compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2209 quit_decomp_thread = false;
2210 for (i = 0; i < thread_count; i++) {
2211 qemu_mutex_init(&decomp_param[i].mutex);
2212 qemu_cond_init(&decomp_param[i].cond);
2213 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2214 qemu_thread_create(decompress_threads + i, "decompress",
2215 do_data_decompress, decomp_param + i,
2216 QEMU_THREAD_JOINABLE);
2220 void migrate_decompress_threads_join(void)
2222 int i, thread_count;
2224 quit_decomp_thread = true;
2225 thread_count = migrate_decompress_threads();
2226 for (i = 0; i < thread_count; i++) {
2227 qemu_mutex_lock(&decomp_param[i].mutex);
2228 qemu_cond_signal(&decomp_param[i].cond);
2229 qemu_mutex_unlock(&decomp_param[i].mutex);
2231 for (i = 0; i < thread_count; i++) {
2232 qemu_thread_join(decompress_threads + i);
2233 qemu_mutex_destroy(&decomp_param[i].mutex);
2234 qemu_cond_destroy(&decomp_param[i].cond);
2235 g_free(decomp_param[i].compbuf);
2237 g_free(decompress_threads);
2238 g_free(decomp_param);
2239 g_free(compressed_data_buf);
2240 decompress_threads = NULL;
2241 decomp_param = NULL;
2242 compressed_data_buf = NULL;
2245 static void decompress_data_with_multi_threads(uint8_t *compbuf,
2246 void *host, int len)
2248 int idx, thread_count;
2250 thread_count = migrate_decompress_threads();
2251 while (true) {
2252 for (idx = 0; idx < thread_count; idx++) {
2253 if (!decomp_param[idx].start) {
2254 memcpy(decomp_param[idx].compbuf, compbuf, len);
2255 decomp_param[idx].des = host;
2256 decomp_param[idx].len = len;
2257 start_decompression(&decomp_param[idx]);
2258 break;
2261 if (idx < thread_count) {
2262 break;
2268 * Allocate data structures etc needed by incoming migration with postcopy-ram
2269 * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
2271 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2273 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2275 return postcopy_ram_incoming_init(mis, ram_pages);
2279 * Called in postcopy mode by ram_load().
2280 * rcu_read_lock is taken prior to this being called.
2282 static int ram_load_postcopy(QEMUFile *f)
2284 int flags = 0, ret = 0;
2285 bool place_needed = false;
2286 bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
2287 MigrationIncomingState *mis = migration_incoming_get_current();
2288 /* Temporary page that is later 'placed' */
2289 void *postcopy_host_page = postcopy_get_tmp_page(mis);
2290 void *last_host = NULL;
2291 bool all_zero = false;
2293 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2294 ram_addr_t addr;
2295 void *host = NULL;
2296 void *page_buffer = NULL;
2297 void *place_source = NULL;
2298 uint8_t ch;
2300 addr = qemu_get_be64(f);
2301 flags = addr & ~TARGET_PAGE_MASK;
2302 addr &= TARGET_PAGE_MASK;
2304 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2305 place_needed = false;
2306 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
2307 host = host_from_stream_offset(f, addr, flags);
2308 if (!host) {
2309 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2310 ret = -EINVAL;
2311 break;
2313 page_buffer = host;
2315 * Postcopy requires that we place whole host pages atomically.
2316 * To make it atomic, the data is read into a temporary page
2317 * that's moved into place later.
2318 * The migration protocol uses, possibly smaller, target-pages
2319 * however the source ensures it always sends all the components
2320 * of a host page in order.
2322 page_buffer = postcopy_host_page +
2323 ((uintptr_t)host & ~qemu_host_page_mask);
2324 /* If all TP are zero then we can optimise the place */
2325 if (!((uintptr_t)host & ~qemu_host_page_mask)) {
2326 all_zero = true;
2327 } else {
2328 /* not the 1st TP within the HP */
2329 if (host != (last_host + TARGET_PAGE_SIZE)) {
2330 error_report("Non-sequential target page %p/%p\n",
2331 host, last_host);
2332 ret = -EINVAL;
2333 break;
2339 * If it's the last part of a host page then we place the host
2340 * page
2342 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2343 ~qemu_host_page_mask) == 0;
2344 place_source = postcopy_host_page;
2346 last_host = host;
2348 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2349 case RAM_SAVE_FLAG_COMPRESS:
2350 ch = qemu_get_byte(f);
2351 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2352 if (ch) {
2353 all_zero = false;
2355 break;
2357 case RAM_SAVE_FLAG_PAGE:
2358 all_zero = false;
2359 if (!place_needed || !matching_page_sizes) {
2360 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2361 } else {
2362 /* Avoids the qemu_file copy during postcopy, which is
2363 * going to do a copy later; can only do it when we
2364 * do this read in one go (matching page sizes)
2366 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2367 TARGET_PAGE_SIZE);
2369 break;
2370 case RAM_SAVE_FLAG_EOS:
2371 /* normal exit */
2372 break;
2373 default:
2374 error_report("Unknown combination of migration flags: %#x"
2375 " (postcopy mode)", flags);
2376 ret = -EINVAL;
2379 if (place_needed) {
2380 /* This gets called at the last target page in the host page */
2381 if (all_zero) {
2382 ret = postcopy_place_page_zero(mis,
2383 host + TARGET_PAGE_SIZE -
2384 qemu_host_page_size);
2385 } else {
2386 ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
2387 qemu_host_page_size,
2388 place_source);
2391 if (!ret) {
2392 ret = qemu_file_get_error(f);
2396 return ret;
2399 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2401 int flags = 0, ret = 0;
2402 static uint64_t seq_iter;
2403 int len = 0;
2405 * If system is running in postcopy mode, page inserts to host memory must
2406 * be atomic
2408 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2410 seq_iter++;
2412 if (version_id != 4) {
2413 ret = -EINVAL;
2416 /* This RCU critical section can be very long running.
2417 * When RCU reclaims in the code start to become numerous,
2418 * it will be necessary to reduce the granularity of this
2419 * critical section.
2421 rcu_read_lock();
2423 if (postcopy_running) {
2424 ret = ram_load_postcopy(f);
2427 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2428 ram_addr_t addr, total_ram_bytes;
2429 void *host = NULL;
2430 uint8_t ch;
2432 addr = qemu_get_be64(f);
2433 flags = addr & ~TARGET_PAGE_MASK;
2434 addr &= TARGET_PAGE_MASK;
2436 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2437 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2438 host = host_from_stream_offset(f, addr, flags);
2439 if (!host) {
2440 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2441 ret = -EINVAL;
2442 break;
2446 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2447 case RAM_SAVE_FLAG_MEM_SIZE:
2448 /* Synchronize RAM block list */
2449 total_ram_bytes = addr;
2450 while (!ret && total_ram_bytes) {
2451 RAMBlock *block;
2452 char id[256];
2453 ram_addr_t length;
2455 len = qemu_get_byte(f);
2456 qemu_get_buffer(f, (uint8_t *)id, len);
2457 id[len] = 0;
2458 length = qemu_get_be64(f);
2460 block = qemu_ram_block_by_name(id);
2461 if (block) {
2462 if (length != block->used_length) {
2463 Error *local_err = NULL;
2465 ret = qemu_ram_resize(block->offset, length,
2466 &local_err);
2467 if (local_err) {
2468 error_report_err(local_err);
2471 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2472 block->idstr);
2473 } else {
2474 error_report("Unknown ramblock \"%s\", cannot "
2475 "accept migration", id);
2476 ret = -EINVAL;
2479 total_ram_bytes -= length;
2481 break;
2483 case RAM_SAVE_FLAG_COMPRESS:
2484 ch = qemu_get_byte(f);
2485 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2486 break;
2488 case RAM_SAVE_FLAG_PAGE:
2489 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2490 break;
2492 case RAM_SAVE_FLAG_COMPRESS_PAGE:
2493 len = qemu_get_be32(f);
2494 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2495 error_report("Invalid compressed data length: %d", len);
2496 ret = -EINVAL;
2497 break;
2499 qemu_get_buffer(f, compressed_data_buf, len);
2500 decompress_data_with_multi_threads(compressed_data_buf, host, len);
2501 break;
2503 case RAM_SAVE_FLAG_XBZRLE:
2504 if (load_xbzrle(f, addr, host) < 0) {
2505 error_report("Failed to decompress XBZRLE page at "
2506 RAM_ADDR_FMT, addr);
2507 ret = -EINVAL;
2508 break;
2510 break;
2511 case RAM_SAVE_FLAG_EOS:
2512 /* normal exit */
2513 break;
2514 default:
2515 if (flags & RAM_SAVE_FLAG_HOOK) {
2516 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2517 } else {
2518 error_report("Unknown combination of migration flags: %#x",
2519 flags);
2520 ret = -EINVAL;
2523 if (!ret) {
2524 ret = qemu_file_get_error(f);
2528 rcu_read_unlock();
2529 DPRINTF("Completed load of VM with exit code %d seq iteration "
2530 "%" PRIu64 "\n", ret, seq_iter);
2531 return ret;
2534 static SaveVMHandlers savevm_ram_handlers = {
2535 .save_live_setup = ram_save_setup,
2536 .save_live_iterate = ram_save_iterate,
2537 .save_live_complete_postcopy = ram_save_complete,
2538 .save_live_complete_precopy = ram_save_complete,
2539 .save_live_pending = ram_save_pending,
2540 .load_state = ram_load,
2541 .cleanup = ram_migration_cleanup,
2544 void ram_mig_init(void)
2546 qemu_mutex_init(&XBZRLE.lock);
2547 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);