postcopy: Transmit and compare individual page sizes
[qemu/ar7.git] / migration / ram.c
blobfbd987a340af5522aafaab7fe3b74834bb278cad
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
28 #include "qemu/osdep.h"
29 #include "qemu-common.h"
30 #include "cpu.h"
31 #include <zlib.h>
32 #include "qapi-event.h"
33 #include "qemu/cutils.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "qemu/timer.h"
37 #include "qemu/main-loop.h"
38 #include "migration/migration.h"
39 #include "migration/postcopy-ram.h"
40 #include "exec/address-spaces.h"
41 #include "migration/page_cache.h"
42 #include "qemu/error-report.h"
43 #include "trace.h"
44 #include "exec/ram_addr.h"
45 #include "qemu/rcu_queue.h"
46 #include "migration/colo.h"
48 static int dirty_rate_high_cnt;
50 static uint64_t bitmap_sync_count;
52 /***********************************************************/
53 /* ram save/restore */
55 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
56 #define RAM_SAVE_FLAG_COMPRESS 0x02
57 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
58 #define RAM_SAVE_FLAG_PAGE 0x08
59 #define RAM_SAVE_FLAG_EOS 0x10
60 #define RAM_SAVE_FLAG_CONTINUE 0x20
61 #define RAM_SAVE_FLAG_XBZRLE 0x40
62 /* 0x80 is reserved in migration.h start with 0x100 next */
63 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
65 static uint8_t *ZERO_TARGET_PAGE;
67 static inline bool is_zero_range(uint8_t *p, uint64_t size)
69 return buffer_is_zero(p, size);
72 /* struct contains XBZRLE cache and a static page
73 used by the compression */
74 static struct {
75 /* buffer used for XBZRLE encoding */
76 uint8_t *encoded_buf;
77 /* buffer for storing page content */
78 uint8_t *current_buf;
79 /* Cache for XBZRLE, Protected by lock. */
80 PageCache *cache;
81 QemuMutex lock;
82 } XBZRLE;
84 /* buffer used for XBZRLE decoding */
85 static uint8_t *xbzrle_decoded_buf;
87 static void XBZRLE_cache_lock(void)
89 if (migrate_use_xbzrle())
90 qemu_mutex_lock(&XBZRLE.lock);
93 static void XBZRLE_cache_unlock(void)
95 if (migrate_use_xbzrle())
96 qemu_mutex_unlock(&XBZRLE.lock);
100 * called from qmp_migrate_set_cache_size in main thread, possibly while
101 * a migration is in progress.
102 * A running migration maybe using the cache and might finish during this
103 * call, hence changes to the cache are protected by XBZRLE.lock().
105 int64_t xbzrle_cache_resize(int64_t new_size)
107 PageCache *new_cache;
108 int64_t ret;
110 if (new_size < TARGET_PAGE_SIZE) {
111 return -1;
114 XBZRLE_cache_lock();
116 if (XBZRLE.cache != NULL) {
117 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
118 goto out_new_size;
120 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
121 TARGET_PAGE_SIZE);
122 if (!new_cache) {
123 error_report("Error creating cache");
124 ret = -1;
125 goto out;
128 cache_fini(XBZRLE.cache);
129 XBZRLE.cache = new_cache;
132 out_new_size:
133 ret = pow2floor(new_size);
134 out:
135 XBZRLE_cache_unlock();
136 return ret;
139 /* accounting for migration statistics */
140 typedef struct AccountingInfo {
141 uint64_t dup_pages;
142 uint64_t skipped_pages;
143 uint64_t norm_pages;
144 uint64_t iterations;
145 uint64_t xbzrle_bytes;
146 uint64_t xbzrle_pages;
147 uint64_t xbzrle_cache_miss;
148 double xbzrle_cache_miss_rate;
149 uint64_t xbzrle_overflows;
150 } AccountingInfo;
152 static AccountingInfo acct_info;
154 static void acct_clear(void)
156 memset(&acct_info, 0, sizeof(acct_info));
159 uint64_t dup_mig_bytes_transferred(void)
161 return acct_info.dup_pages * TARGET_PAGE_SIZE;
164 uint64_t dup_mig_pages_transferred(void)
166 return acct_info.dup_pages;
169 uint64_t skipped_mig_bytes_transferred(void)
171 return acct_info.skipped_pages * TARGET_PAGE_SIZE;
174 uint64_t skipped_mig_pages_transferred(void)
176 return acct_info.skipped_pages;
179 uint64_t norm_mig_bytes_transferred(void)
181 return acct_info.norm_pages * TARGET_PAGE_SIZE;
184 uint64_t norm_mig_pages_transferred(void)
186 return acct_info.norm_pages;
189 uint64_t xbzrle_mig_bytes_transferred(void)
191 return acct_info.xbzrle_bytes;
194 uint64_t xbzrle_mig_pages_transferred(void)
196 return acct_info.xbzrle_pages;
199 uint64_t xbzrle_mig_pages_cache_miss(void)
201 return acct_info.xbzrle_cache_miss;
204 double xbzrle_mig_cache_miss_rate(void)
206 return acct_info.xbzrle_cache_miss_rate;
209 uint64_t xbzrle_mig_pages_overflow(void)
211 return acct_info.xbzrle_overflows;
214 /* This is the last block that we have visited serching for dirty pages
216 static RAMBlock *last_seen_block;
217 /* This is the last block from where we have sent data */
218 static RAMBlock *last_sent_block;
219 static ram_addr_t last_offset;
220 static QemuMutex migration_bitmap_mutex;
221 static uint64_t migration_dirty_pages;
222 static uint32_t last_version;
223 static bool ram_bulk_stage;
225 /* used by the search for pages to send */
226 struct PageSearchStatus {
227 /* Current block being searched */
228 RAMBlock *block;
229 /* Current offset to search from */
230 ram_addr_t offset;
231 /* Set once we wrap around */
232 bool complete_round;
234 typedef struct PageSearchStatus PageSearchStatus;
236 static struct BitmapRcu {
237 struct rcu_head rcu;
238 /* Main migration bitmap */
239 unsigned long *bmap;
240 /* bitmap of pages that haven't been sent even once
241 * only maintained and used in postcopy at the moment
242 * where it's used to send the dirtymap at the start
243 * of the postcopy phase
245 unsigned long *unsentmap;
246 } *migration_bitmap_rcu;
248 struct CompressParam {
249 bool done;
250 bool quit;
251 QEMUFile *file;
252 QemuMutex mutex;
253 QemuCond cond;
254 RAMBlock *block;
255 ram_addr_t offset;
257 typedef struct CompressParam CompressParam;
259 struct DecompressParam {
260 bool done;
261 bool quit;
262 QemuMutex mutex;
263 QemuCond cond;
264 void *des;
265 uint8_t *compbuf;
266 int len;
268 typedef struct DecompressParam DecompressParam;
270 static CompressParam *comp_param;
271 static QemuThread *compress_threads;
272 /* comp_done_cond is used to wake up the migration thread when
273 * one of the compression threads has finished the compression.
274 * comp_done_lock is used to co-work with comp_done_cond.
276 static QemuMutex comp_done_lock;
277 static QemuCond comp_done_cond;
278 /* The empty QEMUFileOps will be used by file in CompressParam */
279 static const QEMUFileOps empty_ops = { };
281 static bool compression_switch;
282 static DecompressParam *decomp_param;
283 static QemuThread *decompress_threads;
284 static QemuMutex decomp_done_lock;
285 static QemuCond decomp_done_cond;
287 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
288 ram_addr_t offset);
290 static void *do_data_compress(void *opaque)
292 CompressParam *param = opaque;
293 RAMBlock *block;
294 ram_addr_t offset;
296 qemu_mutex_lock(&param->mutex);
297 while (!param->quit) {
298 if (param->block) {
299 block = param->block;
300 offset = param->offset;
301 param->block = NULL;
302 qemu_mutex_unlock(&param->mutex);
304 do_compress_ram_page(param->file, block, offset);
306 qemu_mutex_lock(&comp_done_lock);
307 param->done = true;
308 qemu_cond_signal(&comp_done_cond);
309 qemu_mutex_unlock(&comp_done_lock);
311 qemu_mutex_lock(&param->mutex);
312 } else {
313 qemu_cond_wait(&param->cond, &param->mutex);
316 qemu_mutex_unlock(&param->mutex);
318 return NULL;
321 static inline void terminate_compression_threads(void)
323 int idx, thread_count;
325 thread_count = migrate_compress_threads();
326 for (idx = 0; idx < thread_count; idx++) {
327 qemu_mutex_lock(&comp_param[idx].mutex);
328 comp_param[idx].quit = true;
329 qemu_cond_signal(&comp_param[idx].cond);
330 qemu_mutex_unlock(&comp_param[idx].mutex);
334 void migrate_compress_threads_join(void)
336 int i, thread_count;
338 if (!migrate_use_compression()) {
339 return;
341 terminate_compression_threads();
342 thread_count = migrate_compress_threads();
343 for (i = 0; i < thread_count; i++) {
344 qemu_thread_join(compress_threads + i);
345 qemu_fclose(comp_param[i].file);
346 qemu_mutex_destroy(&comp_param[i].mutex);
347 qemu_cond_destroy(&comp_param[i].cond);
349 qemu_mutex_destroy(&comp_done_lock);
350 qemu_cond_destroy(&comp_done_cond);
351 g_free(compress_threads);
352 g_free(comp_param);
353 compress_threads = NULL;
354 comp_param = NULL;
357 void migrate_compress_threads_create(void)
359 int i, thread_count;
361 if (!migrate_use_compression()) {
362 return;
364 compression_switch = true;
365 thread_count = migrate_compress_threads();
366 compress_threads = g_new0(QemuThread, thread_count);
367 comp_param = g_new0(CompressParam, thread_count);
368 qemu_cond_init(&comp_done_cond);
369 qemu_mutex_init(&comp_done_lock);
370 for (i = 0; i < thread_count; i++) {
371 /* comp_param[i].file is just used as a dummy buffer to save data,
372 * set its ops to empty.
374 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
375 comp_param[i].done = true;
376 comp_param[i].quit = false;
377 qemu_mutex_init(&comp_param[i].mutex);
378 qemu_cond_init(&comp_param[i].cond);
379 qemu_thread_create(compress_threads + i, "compress",
380 do_data_compress, comp_param + i,
381 QEMU_THREAD_JOINABLE);
386 * save_page_header: Write page header to wire
388 * If this is the 1st block, it also writes the block identification
390 * Returns: Number of bytes written
392 * @f: QEMUFile where to send the data
393 * @block: block that contains the page we want to send
394 * @offset: offset inside the block for the page
395 * in the lower bits, it contains flags
397 static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
399 size_t size, len;
401 qemu_put_be64(f, offset);
402 size = 8;
404 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
405 len = strlen(block->idstr);
406 qemu_put_byte(f, len);
407 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
408 size += 1 + len;
410 return size;
413 /* Reduce amount of guest cpu execution to hopefully slow down memory writes.
414 * If guest dirty memory rate is reduced below the rate at which we can
415 * transfer pages to the destination then we should be able to complete
416 * migration. Some workloads dirty memory way too fast and will not effectively
417 * converge, even with auto-converge.
419 static void mig_throttle_guest_down(void)
421 MigrationState *s = migrate_get_current();
422 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
423 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
425 /* We have not started throttling yet. Let's start it. */
426 if (!cpu_throttle_active()) {
427 cpu_throttle_set(pct_initial);
428 } else {
429 /* Throttling already on, just increase the rate */
430 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
434 /* Update the xbzrle cache to reflect a page that's been sent as all 0.
435 * The important thing is that a stale (not-yet-0'd) page be replaced
436 * by the new data.
437 * As a bonus, if the page wasn't in the cache it gets added so that
438 * when a small write is made into the 0'd page it gets XBZRLE sent
440 static void xbzrle_cache_zero_page(ram_addr_t current_addr)
442 if (ram_bulk_stage || !migrate_use_xbzrle()) {
443 return;
446 /* We don't care if this fails to allocate a new cache page
447 * as long as it updated an old one */
448 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
449 bitmap_sync_count);
452 #define ENCODING_FLAG_XBZRLE 0x1
455 * save_xbzrle_page: compress and send current page
457 * Returns: 1 means that we wrote the page
458 * 0 means that page is identical to the one already sent
459 * -1 means that xbzrle would be longer than normal
461 * @f: QEMUFile where to send the data
462 * @current_data:
463 * @current_addr:
464 * @block: block that contains the page we want to send
465 * @offset: offset inside the block for the page
466 * @last_stage: if we are at the completion stage
467 * @bytes_transferred: increase it with the number of transferred bytes
469 static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
470 ram_addr_t current_addr, RAMBlock *block,
471 ram_addr_t offset, bool last_stage,
472 uint64_t *bytes_transferred)
474 int encoded_len = 0, bytes_xbzrle;
475 uint8_t *prev_cached_page;
477 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
478 acct_info.xbzrle_cache_miss++;
479 if (!last_stage) {
480 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
481 bitmap_sync_count) == -1) {
482 return -1;
483 } else {
484 /* update *current_data when the page has been
485 inserted into cache */
486 *current_data = get_cached_data(XBZRLE.cache, current_addr);
489 return -1;
492 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
494 /* save current buffer into memory */
495 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
497 /* XBZRLE encoding (if there is no overflow) */
498 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
499 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
500 TARGET_PAGE_SIZE);
501 if (encoded_len == 0) {
502 trace_save_xbzrle_page_skipping();
503 return 0;
504 } else if (encoded_len == -1) {
505 trace_save_xbzrle_page_overflow();
506 acct_info.xbzrle_overflows++;
507 /* update data in the cache */
508 if (!last_stage) {
509 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
510 *current_data = prev_cached_page;
512 return -1;
515 /* we need to update the data in the cache, in order to get the same data */
516 if (!last_stage) {
517 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
520 /* Send XBZRLE based compressed page */
521 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
522 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
523 qemu_put_be16(f, encoded_len);
524 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
525 bytes_xbzrle += encoded_len + 1 + 2;
526 acct_info.xbzrle_pages++;
527 acct_info.xbzrle_bytes += bytes_xbzrle;
528 *bytes_transferred += bytes_xbzrle;
530 return 1;
533 /* Called with rcu_read_lock() to protect migration_bitmap
534 * rb: The RAMBlock to search for dirty pages in
535 * start: Start address (typically so we can continue from previous page)
536 * ram_addr_abs: Pointer into which to store the address of the dirty page
537 * within the global ram_addr space
539 * Returns: byte offset within memory region of the start of a dirty page
541 static inline
542 ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb,
543 ram_addr_t start,
544 ram_addr_t *ram_addr_abs)
546 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
547 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
548 uint64_t rb_size = rb->used_length;
549 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
550 unsigned long *bitmap;
552 unsigned long next;
554 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
555 if (ram_bulk_stage && nr > base) {
556 next = nr + 1;
557 } else {
558 next = find_next_bit(bitmap, size, nr);
561 *ram_addr_abs = next << TARGET_PAGE_BITS;
562 return (next - base) << TARGET_PAGE_BITS;
565 static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
567 bool ret;
568 int nr = addr >> TARGET_PAGE_BITS;
569 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
571 ret = test_and_clear_bit(nr, bitmap);
573 if (ret) {
574 migration_dirty_pages--;
576 return ret;
579 static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
581 unsigned long *bitmap;
582 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
583 migration_dirty_pages +=
584 cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
587 /* Fix me: there are too many global variables used in migration process. */
588 static int64_t start_time;
589 static int64_t bytes_xfer_prev;
590 static int64_t num_dirty_pages_period;
591 static uint64_t xbzrle_cache_miss_prev;
592 static uint64_t iterations_prev;
594 static void migration_bitmap_sync_init(void)
596 start_time = 0;
597 bytes_xfer_prev = 0;
598 num_dirty_pages_period = 0;
599 xbzrle_cache_miss_prev = 0;
600 iterations_prev = 0;
603 /* Returns a summary bitmap of the page sizes of all RAMBlocks;
604 * for VMs with just normal pages this is equivalent to the
605 * host page size. If it's got some huge pages then it's the OR
606 * of all the different page sizes.
608 uint64_t ram_pagesize_summary(void)
610 RAMBlock *block;
611 uint64_t summary = 0;
613 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
614 summary |= block->page_size;
617 return summary;
620 static void migration_bitmap_sync(void)
622 RAMBlock *block;
623 uint64_t num_dirty_pages_init = migration_dirty_pages;
624 MigrationState *s = migrate_get_current();
625 int64_t end_time;
626 int64_t bytes_xfer_now;
628 bitmap_sync_count++;
630 if (!bytes_xfer_prev) {
631 bytes_xfer_prev = ram_bytes_transferred();
634 if (!start_time) {
635 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
638 trace_migration_bitmap_sync_start();
639 memory_global_dirty_log_sync();
641 qemu_mutex_lock(&migration_bitmap_mutex);
642 rcu_read_lock();
643 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
644 migration_bitmap_sync_range(block->offset, block->used_length);
646 rcu_read_unlock();
647 qemu_mutex_unlock(&migration_bitmap_mutex);
649 trace_migration_bitmap_sync_end(migration_dirty_pages
650 - num_dirty_pages_init);
651 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
652 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
654 /* more than 1 second = 1000 millisecons */
655 if (end_time > start_time + 1000) {
656 if (migrate_auto_converge()) {
657 /* The following detection logic can be refined later. For now:
658 Check to see if the dirtied bytes is 50% more than the approx.
659 amount of bytes that just got transferred since the last time we
660 were in this routine. If that happens twice, start or increase
661 throttling */
662 bytes_xfer_now = ram_bytes_transferred();
664 if (s->dirty_pages_rate &&
665 (num_dirty_pages_period * TARGET_PAGE_SIZE >
666 (bytes_xfer_now - bytes_xfer_prev)/2) &&
667 (dirty_rate_high_cnt++ >= 2)) {
668 trace_migration_throttle();
669 dirty_rate_high_cnt = 0;
670 mig_throttle_guest_down();
672 bytes_xfer_prev = bytes_xfer_now;
675 if (migrate_use_xbzrle()) {
676 if (iterations_prev != acct_info.iterations) {
677 acct_info.xbzrle_cache_miss_rate =
678 (double)(acct_info.xbzrle_cache_miss -
679 xbzrle_cache_miss_prev) /
680 (acct_info.iterations - iterations_prev);
682 iterations_prev = acct_info.iterations;
683 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
685 s->dirty_pages_rate = num_dirty_pages_period * 1000
686 / (end_time - start_time);
687 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
688 start_time = end_time;
689 num_dirty_pages_period = 0;
691 s->dirty_sync_count = bitmap_sync_count;
692 if (migrate_use_events()) {
693 qapi_event_send_migration_pass(bitmap_sync_count, NULL);
698 * save_zero_page: Send the zero page to the stream
700 * Returns: Number of pages written.
702 * @f: QEMUFile where to send the data
703 * @block: block that contains the page we want to send
704 * @offset: offset inside the block for the page
705 * @p: pointer to the page
706 * @bytes_transferred: increase it with the number of transferred bytes
708 static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
709 uint8_t *p, uint64_t *bytes_transferred)
711 int pages = -1;
713 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
714 acct_info.dup_pages++;
715 *bytes_transferred += save_page_header(f, block,
716 offset | RAM_SAVE_FLAG_COMPRESS);
717 qemu_put_byte(f, 0);
718 *bytes_transferred += 1;
719 pages = 1;
722 return pages;
725 static void ram_release_pages(MigrationState *ms, const char *block_name,
726 uint64_t offset, int pages)
728 if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
729 return;
732 ram_discard_range(NULL, block_name, offset, pages << TARGET_PAGE_BITS);
736 * ram_save_page: Send the given page to the stream
738 * Returns: Number of pages written.
739 * < 0 - error
740 * >=0 - Number of pages written - this might legally be 0
741 * if xbzrle noticed the page was the same.
743 * @ms: The current migration state.
744 * @f: QEMUFile where to send the data
745 * @block: block that contains the page we want to send
746 * @offset: offset inside the block for the page
747 * @last_stage: if we are at the completion stage
748 * @bytes_transferred: increase it with the number of transferred bytes
750 static int ram_save_page(MigrationState *ms, QEMUFile *f, PageSearchStatus *pss,
751 bool last_stage, uint64_t *bytes_transferred)
753 int pages = -1;
754 uint64_t bytes_xmit;
755 ram_addr_t current_addr;
756 uint8_t *p;
757 int ret;
758 bool send_async = true;
759 RAMBlock *block = pss->block;
760 ram_addr_t offset = pss->offset;
762 p = block->host + offset;
764 /* In doubt sent page as normal */
765 bytes_xmit = 0;
766 ret = ram_control_save_page(f, block->offset,
767 offset, TARGET_PAGE_SIZE, &bytes_xmit);
768 if (bytes_xmit) {
769 *bytes_transferred += bytes_xmit;
770 pages = 1;
773 XBZRLE_cache_lock();
775 current_addr = block->offset + offset;
777 if (block == last_sent_block) {
778 offset |= RAM_SAVE_FLAG_CONTINUE;
780 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
781 if (ret != RAM_SAVE_CONTROL_DELAYED) {
782 if (bytes_xmit > 0) {
783 acct_info.norm_pages++;
784 } else if (bytes_xmit == 0) {
785 acct_info.dup_pages++;
788 } else {
789 pages = save_zero_page(f, block, offset, p, bytes_transferred);
790 if (pages > 0) {
791 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
792 * page would be stale
794 xbzrle_cache_zero_page(current_addr);
795 ram_release_pages(ms, block->idstr, pss->offset, pages);
796 } else if (!ram_bulk_stage &&
797 !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
798 pages = save_xbzrle_page(f, &p, current_addr, block,
799 offset, last_stage, bytes_transferred);
800 if (!last_stage) {
801 /* Can't send this cached data async, since the cache page
802 * might get updated before it gets to the wire
804 send_async = false;
809 /* XBZRLE overflow or normal page */
810 if (pages == -1) {
811 *bytes_transferred += save_page_header(f, block,
812 offset | RAM_SAVE_FLAG_PAGE);
813 if (send_async) {
814 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
815 migrate_release_ram() &
816 migration_in_postcopy(ms));
817 } else {
818 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
820 *bytes_transferred += TARGET_PAGE_SIZE;
821 pages = 1;
822 acct_info.norm_pages++;
825 XBZRLE_cache_unlock();
827 return pages;
830 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
831 ram_addr_t offset)
833 int bytes_sent, blen;
834 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
836 bytes_sent = save_page_header(f, block, offset |
837 RAM_SAVE_FLAG_COMPRESS_PAGE);
838 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
839 migrate_compress_level());
840 if (blen < 0) {
841 bytes_sent = 0;
842 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
843 error_report("compressed data failed!");
844 } else {
845 bytes_sent += blen;
846 ram_release_pages(migrate_get_current(), block->idstr,
847 offset & TARGET_PAGE_MASK, 1);
850 return bytes_sent;
853 static uint64_t bytes_transferred;
855 static void flush_compressed_data(QEMUFile *f)
857 int idx, len, thread_count;
859 if (!migrate_use_compression()) {
860 return;
862 thread_count = migrate_compress_threads();
864 qemu_mutex_lock(&comp_done_lock);
865 for (idx = 0; idx < thread_count; idx++) {
866 while (!comp_param[idx].done) {
867 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
870 qemu_mutex_unlock(&comp_done_lock);
872 for (idx = 0; idx < thread_count; idx++) {
873 qemu_mutex_lock(&comp_param[idx].mutex);
874 if (!comp_param[idx].quit) {
875 len = qemu_put_qemu_file(f, comp_param[idx].file);
876 bytes_transferred += len;
878 qemu_mutex_unlock(&comp_param[idx].mutex);
882 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
883 ram_addr_t offset)
885 param->block = block;
886 param->offset = offset;
889 static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
890 ram_addr_t offset,
891 uint64_t *bytes_transferred)
893 int idx, thread_count, bytes_xmit = -1, pages = -1;
895 thread_count = migrate_compress_threads();
896 qemu_mutex_lock(&comp_done_lock);
897 while (true) {
898 for (idx = 0; idx < thread_count; idx++) {
899 if (comp_param[idx].done) {
900 comp_param[idx].done = false;
901 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
902 qemu_mutex_lock(&comp_param[idx].mutex);
903 set_compress_params(&comp_param[idx], block, offset);
904 qemu_cond_signal(&comp_param[idx].cond);
905 qemu_mutex_unlock(&comp_param[idx].mutex);
906 pages = 1;
907 acct_info.norm_pages++;
908 *bytes_transferred += bytes_xmit;
909 break;
912 if (pages > 0) {
913 break;
914 } else {
915 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
918 qemu_mutex_unlock(&comp_done_lock);
920 return pages;
924 * ram_save_compressed_page: compress the given page and send it to the stream
926 * Returns: Number of pages written.
928 * @ms: The current migration state.
929 * @f: QEMUFile where to send the data
930 * @block: block that contains the page we want to send
931 * @offset: offset inside the block for the page
932 * @last_stage: if we are at the completion stage
933 * @bytes_transferred: increase it with the number of transferred bytes
935 static int ram_save_compressed_page(MigrationState *ms, QEMUFile *f,
936 PageSearchStatus *pss, bool last_stage,
937 uint64_t *bytes_transferred)
939 int pages = -1;
940 uint64_t bytes_xmit = 0;
941 uint8_t *p;
942 int ret, blen;
943 RAMBlock *block = pss->block;
944 ram_addr_t offset = pss->offset;
946 p = block->host + offset;
948 ret = ram_control_save_page(f, block->offset,
949 offset, TARGET_PAGE_SIZE, &bytes_xmit);
950 if (bytes_xmit) {
951 *bytes_transferred += bytes_xmit;
952 pages = 1;
954 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
955 if (ret != RAM_SAVE_CONTROL_DELAYED) {
956 if (bytes_xmit > 0) {
957 acct_info.norm_pages++;
958 } else if (bytes_xmit == 0) {
959 acct_info.dup_pages++;
962 } else {
963 /* When starting the process of a new block, the first page of
964 * the block should be sent out before other pages in the same
965 * block, and all the pages in last block should have been sent
966 * out, keeping this order is important, because the 'cont' flag
967 * is used to avoid resending the block name.
969 if (block != last_sent_block) {
970 flush_compressed_data(f);
971 pages = save_zero_page(f, block, offset, p, bytes_transferred);
972 if (pages == -1) {
973 /* Make sure the first page is sent out before other pages */
974 bytes_xmit = save_page_header(f, block, offset |
975 RAM_SAVE_FLAG_COMPRESS_PAGE);
976 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
977 migrate_compress_level());
978 if (blen > 0) {
979 *bytes_transferred += bytes_xmit + blen;
980 acct_info.norm_pages++;
981 pages = 1;
982 } else {
983 qemu_file_set_error(f, blen);
984 error_report("compressed data failed!");
987 if (pages > 0) {
988 ram_release_pages(ms, block->idstr, pss->offset, pages);
990 } else {
991 offset |= RAM_SAVE_FLAG_CONTINUE;
992 pages = save_zero_page(f, block, offset, p, bytes_transferred);
993 if (pages == -1) {
994 pages = compress_page_with_multi_thread(f, block, offset,
995 bytes_transferred);
996 } else {
997 ram_release_pages(ms, block->idstr, pss->offset, pages);
1002 return pages;
1006 * Find the next dirty page and update any state associated with
1007 * the search process.
1009 * Returns: True if a page is found
1011 * @f: Current migration stream.
1012 * @pss: Data about the state of the current dirty page scan.
1013 * @*again: Set to false if the search has scanned the whole of RAM
1014 * *ram_addr_abs: Pointer into which to store the address of the dirty page
1015 * within the global ram_addr space
1017 static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
1018 bool *again, ram_addr_t *ram_addr_abs)
1020 pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset,
1021 ram_addr_abs);
1022 if (pss->complete_round && pss->block == last_seen_block &&
1023 pss->offset >= last_offset) {
1025 * We've been once around the RAM and haven't found anything.
1026 * Give up.
1028 *again = false;
1029 return false;
1031 if (pss->offset >= pss->block->used_length) {
1032 /* Didn't find anything in this RAM Block */
1033 pss->offset = 0;
1034 pss->block = QLIST_NEXT_RCU(pss->block, next);
1035 if (!pss->block) {
1036 /* Hit the end of the list */
1037 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1038 /* Flag that we've looped */
1039 pss->complete_round = true;
1040 ram_bulk_stage = false;
1041 if (migrate_use_xbzrle()) {
1042 /* If xbzrle is on, stop using the data compression at this
1043 * point. In theory, xbzrle can do better than compression.
1045 flush_compressed_data(f);
1046 compression_switch = false;
1049 /* Didn't find anything this time, but try again on the new block */
1050 *again = true;
1051 return false;
1052 } else {
1053 /* Can go around again, but... */
1054 *again = true;
1055 /* We've found something so probably don't need to */
1056 return true;
1061 * Helper for 'get_queued_page' - gets a page off the queue
1062 * ms: MigrationState in
1063 * *offset: Used to return the offset within the RAMBlock
1064 * ram_addr_abs: global offset in the dirty/sent bitmaps
1066 * Returns: block (or NULL if none available)
1068 static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1069 ram_addr_t *ram_addr_abs)
1071 RAMBlock *block = NULL;
1073 qemu_mutex_lock(&ms->src_page_req_mutex);
1074 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1075 struct MigrationSrcPageRequest *entry =
1076 QSIMPLEQ_FIRST(&ms->src_page_requests);
1077 block = entry->rb;
1078 *offset = entry->offset;
1079 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1080 TARGET_PAGE_MASK;
1082 if (entry->len > TARGET_PAGE_SIZE) {
1083 entry->len -= TARGET_PAGE_SIZE;
1084 entry->offset += TARGET_PAGE_SIZE;
1085 } else {
1086 memory_region_unref(block->mr);
1087 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1088 g_free(entry);
1091 qemu_mutex_unlock(&ms->src_page_req_mutex);
1093 return block;
1097 * Unqueue a page from the queue fed by postcopy page requests; skips pages
1098 * that are already sent (!dirty)
1100 * ms: MigrationState in
1101 * pss: PageSearchStatus structure updated with found block/offset
1102 * ram_addr_abs: global offset in the dirty/sent bitmaps
1104 * Returns: true if a queued page is found
1106 static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
1107 ram_addr_t *ram_addr_abs)
1109 RAMBlock *block;
1110 ram_addr_t offset;
1111 bool dirty;
1113 do {
1114 block = unqueue_page(ms, &offset, ram_addr_abs);
1116 * We're sending this page, and since it's postcopy nothing else
1117 * will dirty it, and we must make sure it doesn't get sent again
1118 * even if this queue request was received after the background
1119 * search already sent it.
1121 if (block) {
1122 unsigned long *bitmap;
1123 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1124 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1125 if (!dirty) {
1126 trace_get_queued_page_not_dirty(
1127 block->idstr, (uint64_t)offset,
1128 (uint64_t)*ram_addr_abs,
1129 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1130 atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1131 } else {
1132 trace_get_queued_page(block->idstr,
1133 (uint64_t)offset,
1134 (uint64_t)*ram_addr_abs);
1138 } while (block && !dirty);
1140 if (block) {
1142 * As soon as we start servicing pages out of order, then we have
1143 * to kill the bulk stage, since the bulk stage assumes
1144 * in (migration_bitmap_find_and_reset_dirty) that every page is
1145 * dirty, that's no longer true.
1147 ram_bulk_stage = false;
1150 * We want the background search to continue from the queued page
1151 * since the guest is likely to want other pages near to the page
1152 * it just requested.
1154 pss->block = block;
1155 pss->offset = offset;
1158 return !!block;
1162 * flush_page_queue: Flush any remaining pages in the ram request queue
1163 * it should be empty at the end anyway, but in error cases there may be
1164 * some left.
1166 * ms: MigrationState
1168 void flush_page_queue(MigrationState *ms)
1170 struct MigrationSrcPageRequest *mspr, *next_mspr;
1171 /* This queue generally should be empty - but in the case of a failed
1172 * migration might have some droppings in.
1174 rcu_read_lock();
1175 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1176 memory_region_unref(mspr->rb->mr);
1177 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1178 g_free(mspr);
1180 rcu_read_unlock();
1184 * Queue the pages for transmission, e.g. a request from postcopy destination
1185 * ms: MigrationStatus in which the queue is held
1186 * rbname: The RAMBlock the request is for - may be NULL (to mean reuse last)
1187 * start: Offset from the start of the RAMBlock
1188 * len: Length (in bytes) to send
1189 * Return: 0 on success
1191 int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1192 ram_addr_t start, ram_addr_t len)
1194 RAMBlock *ramblock;
1196 ms->postcopy_requests++;
1197 rcu_read_lock();
1198 if (!rbname) {
1199 /* Reuse last RAMBlock */
1200 ramblock = ms->last_req_rb;
1202 if (!ramblock) {
1204 * Shouldn't happen, we can't reuse the last RAMBlock if
1205 * it's the 1st request.
1207 error_report("ram_save_queue_pages no previous block");
1208 goto err;
1210 } else {
1211 ramblock = qemu_ram_block_by_name(rbname);
1213 if (!ramblock) {
1214 /* We shouldn't be asked for a non-existent RAMBlock */
1215 error_report("ram_save_queue_pages no block '%s'", rbname);
1216 goto err;
1218 ms->last_req_rb = ramblock;
1220 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1221 if (start+len > ramblock->used_length) {
1222 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1223 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1224 __func__, start, len, ramblock->used_length);
1225 goto err;
1228 struct MigrationSrcPageRequest *new_entry =
1229 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1230 new_entry->rb = ramblock;
1231 new_entry->offset = start;
1232 new_entry->len = len;
1234 memory_region_ref(ramblock->mr);
1235 qemu_mutex_lock(&ms->src_page_req_mutex);
1236 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1237 qemu_mutex_unlock(&ms->src_page_req_mutex);
1238 rcu_read_unlock();
1240 return 0;
1242 err:
1243 rcu_read_unlock();
1244 return -1;
1248 * ram_save_target_page: Save one target page
1251 * @f: QEMUFile where to send the data
1252 * @block: pointer to block that contains the page we want to send
1253 * @offset: offset inside the block for the page;
1254 * @last_stage: if we are at the completion stage
1255 * @bytes_transferred: increase it with the number of transferred bytes
1256 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1258 * Returns: Number of pages written.
1260 static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
1261 PageSearchStatus *pss,
1262 bool last_stage,
1263 uint64_t *bytes_transferred,
1264 ram_addr_t dirty_ram_abs)
1266 int res = 0;
1268 /* Check the pages is dirty and if it is send it */
1269 if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1270 unsigned long *unsentmap;
1271 if (compression_switch && migrate_use_compression()) {
1272 res = ram_save_compressed_page(ms, f, pss,
1273 last_stage,
1274 bytes_transferred);
1275 } else {
1276 res = ram_save_page(ms, f, pss, last_stage,
1277 bytes_transferred);
1280 if (res < 0) {
1281 return res;
1283 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1284 if (unsentmap) {
1285 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1287 /* Only update last_sent_block if a block was actually sent; xbzrle
1288 * might have decided the page was identical so didn't bother writing
1289 * to the stream.
1291 if (res > 0) {
1292 last_sent_block = pss->block;
1296 return res;
1300 * ram_save_host_page: Starting at *offset send pages up to the end
1301 * of the current host page. It's valid for the initial
1302 * offset to point into the middle of a host page
1303 * in which case the remainder of the hostpage is sent.
1304 * Only dirty target pages are sent.
1306 * Returns: Number of pages written.
1308 * @f: QEMUFile where to send the data
1309 * @block: pointer to block that contains the page we want to send
1310 * @offset: offset inside the block for the page; updated to last target page
1311 * sent
1312 * @last_stage: if we are at the completion stage
1313 * @bytes_transferred: increase it with the number of transferred bytes
1314 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1316 static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
1317 PageSearchStatus *pss,
1318 bool last_stage,
1319 uint64_t *bytes_transferred,
1320 ram_addr_t dirty_ram_abs)
1322 int tmppages, pages = 0;
1323 do {
1324 tmppages = ram_save_target_page(ms, f, pss, last_stage,
1325 bytes_transferred, dirty_ram_abs);
1326 if (tmppages < 0) {
1327 return tmppages;
1330 pages += tmppages;
1331 pss->offset += TARGET_PAGE_SIZE;
1332 dirty_ram_abs += TARGET_PAGE_SIZE;
1333 } while (pss->offset & (qemu_host_page_size - 1));
1335 /* The offset we leave with is the last one we looked at */
1336 pss->offset -= TARGET_PAGE_SIZE;
1337 return pages;
1341 * ram_find_and_save_block: Finds a dirty page and sends it to f
1343 * Called within an RCU critical section.
1345 * Returns: The number of pages written
1346 * 0 means no dirty pages
1348 * @f: QEMUFile where to send the data
1349 * @last_stage: if we are at the completion stage
1350 * @bytes_transferred: increase it with the number of transferred bytes
1352 * On systems where host-page-size > target-page-size it will send all the
1353 * pages in a host page that are dirty.
1356 static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
1357 uint64_t *bytes_transferred)
1359 PageSearchStatus pss;
1360 MigrationState *ms = migrate_get_current();
1361 int pages = 0;
1362 bool again, found;
1363 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1364 ram_addr_t space */
1366 /* No dirty page as there is zero RAM */
1367 if (!ram_bytes_total()) {
1368 return pages;
1371 pss.block = last_seen_block;
1372 pss.offset = last_offset;
1373 pss.complete_round = false;
1375 if (!pss.block) {
1376 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1379 do {
1380 again = true;
1381 found = get_queued_page(ms, &pss, &dirty_ram_abs);
1383 if (!found) {
1384 /* priority queue empty, so just search for something dirty */
1385 found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
1388 if (found) {
1389 pages = ram_save_host_page(ms, f, &pss,
1390 last_stage, bytes_transferred,
1391 dirty_ram_abs);
1393 } while (!pages && again);
1395 last_seen_block = pss.block;
1396 last_offset = pss.offset;
1398 return pages;
1401 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1403 uint64_t pages = size / TARGET_PAGE_SIZE;
1404 if (zero) {
1405 acct_info.dup_pages += pages;
1406 } else {
1407 acct_info.norm_pages += pages;
1408 bytes_transferred += size;
1409 qemu_update_position(f, size);
1413 static ram_addr_t ram_save_remaining(void)
1415 return migration_dirty_pages;
1418 uint64_t ram_bytes_remaining(void)
1420 return ram_save_remaining() * TARGET_PAGE_SIZE;
1423 uint64_t ram_bytes_transferred(void)
1425 return bytes_transferred;
1428 uint64_t ram_bytes_total(void)
1430 RAMBlock *block;
1431 uint64_t total = 0;
1433 rcu_read_lock();
1434 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1435 total += block->used_length;
1436 rcu_read_unlock();
1437 return total;
1440 void free_xbzrle_decoded_buf(void)
1442 g_free(xbzrle_decoded_buf);
1443 xbzrle_decoded_buf = NULL;
1446 static void migration_bitmap_free(struct BitmapRcu *bmap)
1448 g_free(bmap->bmap);
1449 g_free(bmap->unsentmap);
1450 g_free(bmap);
1453 static void ram_migration_cleanup(void *opaque)
1455 /* caller have hold iothread lock or is in a bh, so there is
1456 * no writing race against this migration_bitmap
1458 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1459 atomic_rcu_set(&migration_bitmap_rcu, NULL);
1460 if (bitmap) {
1461 memory_global_dirty_log_stop();
1462 call_rcu(bitmap, migration_bitmap_free, rcu);
1465 XBZRLE_cache_lock();
1466 if (XBZRLE.cache) {
1467 cache_fini(XBZRLE.cache);
1468 g_free(XBZRLE.encoded_buf);
1469 g_free(XBZRLE.current_buf);
1470 g_free(ZERO_TARGET_PAGE);
1471 XBZRLE.cache = NULL;
1472 XBZRLE.encoded_buf = NULL;
1473 XBZRLE.current_buf = NULL;
1475 XBZRLE_cache_unlock();
1478 static void reset_ram_globals(void)
1480 last_seen_block = NULL;
1481 last_sent_block = NULL;
1482 last_offset = 0;
1483 last_version = ram_list.version;
1484 ram_bulk_stage = true;
1487 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1489 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1491 /* called in qemu main thread, so there is
1492 * no writing race against this migration_bitmap
1494 if (migration_bitmap_rcu) {
1495 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1496 bitmap = g_new(struct BitmapRcu, 1);
1497 bitmap->bmap = bitmap_new(new);
1499 /* prevent migration_bitmap content from being set bit
1500 * by migration_bitmap_sync_range() at the same time.
1501 * it is safe to migration if migration_bitmap is cleared bit
1502 * at the same time.
1504 qemu_mutex_lock(&migration_bitmap_mutex);
1505 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1506 bitmap_set(bitmap->bmap, old, new - old);
1508 /* We don't have a way to safely extend the sentmap
1509 * with RCU; so mark it as missing, entry to postcopy
1510 * will fail.
1512 bitmap->unsentmap = NULL;
1514 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
1515 qemu_mutex_unlock(&migration_bitmap_mutex);
1516 migration_dirty_pages += new - old;
1517 call_rcu(old_bitmap, migration_bitmap_free, rcu);
1522 * 'expected' is the value you expect the bitmap mostly to be full
1523 * of; it won't bother printing lines that are all this value.
1524 * If 'todump' is null the migration bitmap is dumped.
1526 void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1528 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1530 int64_t cur;
1531 int64_t linelen = 128;
1532 char linebuf[129];
1534 if (!todump) {
1535 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1538 for (cur = 0; cur < ram_pages; cur += linelen) {
1539 int64_t curb;
1540 bool found = false;
1542 * Last line; catch the case where the line length
1543 * is longer than remaining ram
1545 if (cur + linelen > ram_pages) {
1546 linelen = ram_pages - cur;
1548 for (curb = 0; curb < linelen; curb++) {
1549 bool thisbit = test_bit(cur + curb, todump);
1550 linebuf[curb] = thisbit ? '1' : '.';
1551 found = found || (thisbit != expected);
1553 if (found) {
1554 linebuf[curb] = '\0';
1555 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1560 /* **** functions for postcopy ***** */
1562 void ram_postcopy_migrated_memory_release(MigrationState *ms)
1564 struct RAMBlock *block;
1565 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1567 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1568 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1569 unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1570 unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1572 while (run_start < range) {
1573 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1574 ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS,
1575 (run_end - run_start) << TARGET_PAGE_BITS);
1576 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1582 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1583 * Note: At this point the 'unsentmap' is the processed bitmap combined
1584 * with the dirtymap; so a '1' means it's either dirty or unsent.
1585 * start,length: Indexes into the bitmap for the first bit
1586 * representing the named block and length in target-pages
1588 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1589 PostcopyDiscardState *pds,
1590 unsigned long start,
1591 unsigned long length)
1593 unsigned long end = start + length; /* one after the end */
1594 unsigned long current;
1595 unsigned long *unsentmap;
1597 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1598 for (current = start; current < end; ) {
1599 unsigned long one = find_next_bit(unsentmap, end, current);
1601 if (one <= end) {
1602 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1603 unsigned long discard_length;
1605 if (zero >= end) {
1606 discard_length = end - one;
1607 } else {
1608 discard_length = zero - one;
1610 if (discard_length) {
1611 postcopy_discard_send_range(ms, pds, one, discard_length);
1613 current = one + discard_length;
1614 } else {
1615 current = one;
1619 return 0;
1623 * Utility for the outgoing postcopy code.
1624 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1625 * passing it bitmap indexes and name.
1626 * Returns: 0 on success
1627 * (qemu_ram_foreach_block ends up passing unscaled lengths
1628 * which would mean postcopy code would have to deal with target page)
1630 static int postcopy_each_ram_send_discard(MigrationState *ms)
1632 struct RAMBlock *block;
1633 int ret;
1635 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1636 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1637 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1638 first,
1639 block->idstr);
1642 * Postcopy sends chunks of bitmap over the wire, but it
1643 * just needs indexes at this point, avoids it having
1644 * target page specific code.
1646 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1647 block->used_length >> TARGET_PAGE_BITS);
1648 postcopy_discard_send_finish(ms, pds);
1649 if (ret) {
1650 return ret;
1654 return 0;
1658 * Helper for postcopy_chunk_hostpages; it's called twice to cleanup
1659 * the two bitmaps, that are similar, but one is inverted.
1661 * We search for runs of target-pages that don't start or end on a
1662 * host page boundary;
1663 * unsent_pass=true: Cleans up partially unsent host pages by searching
1664 * the unsentmap
1665 * unsent_pass=false: Cleans up partially dirty host pages by searching
1666 * the main migration bitmap
1669 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1670 RAMBlock *block,
1671 PostcopyDiscardState *pds)
1673 unsigned long *bitmap;
1674 unsigned long *unsentmap;
1675 unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
1676 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1677 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1678 unsigned long last = first + (len - 1);
1679 unsigned long run_start;
1681 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1682 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1684 if (unsent_pass) {
1685 /* Find a sent page */
1686 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1687 } else {
1688 /* Find a dirty page */
1689 run_start = find_next_bit(bitmap, last + 1, first);
1692 while (run_start <= last) {
1693 bool do_fixup = false;
1694 unsigned long fixup_start_addr;
1695 unsigned long host_offset;
1698 * If the start of this run of pages is in the middle of a host
1699 * page, then we need to fixup this host page.
1701 host_offset = run_start % host_ratio;
1702 if (host_offset) {
1703 do_fixup = true;
1704 run_start -= host_offset;
1705 fixup_start_addr = run_start;
1706 /* For the next pass */
1707 run_start = run_start + host_ratio;
1708 } else {
1709 /* Find the end of this run */
1710 unsigned long run_end;
1711 if (unsent_pass) {
1712 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1713 } else {
1714 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1717 * If the end isn't at the start of a host page, then the
1718 * run doesn't finish at the end of a host page
1719 * and we need to discard.
1721 host_offset = run_end % host_ratio;
1722 if (host_offset) {
1723 do_fixup = true;
1724 fixup_start_addr = run_end - host_offset;
1726 * This host page has gone, the next loop iteration starts
1727 * from after the fixup
1729 run_start = fixup_start_addr + host_ratio;
1730 } else {
1732 * No discards on this iteration, next loop starts from
1733 * next sent/dirty page
1735 run_start = run_end + 1;
1739 if (do_fixup) {
1740 unsigned long page;
1742 /* Tell the destination to discard this page */
1743 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1744 /* For the unsent_pass we:
1745 * discard partially sent pages
1746 * For the !unsent_pass (dirty) we:
1747 * discard partially dirty pages that were sent
1748 * (any partially sent pages were already discarded
1749 * by the previous unsent_pass)
1751 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1752 host_ratio);
1755 /* Clean up the bitmap */
1756 for (page = fixup_start_addr;
1757 page < fixup_start_addr + host_ratio; page++) {
1758 /* All pages in this host page are now not sent */
1759 set_bit(page, unsentmap);
1762 * Remark them as dirty, updating the count for any pages
1763 * that weren't previously dirty.
1765 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1769 if (unsent_pass) {
1770 /* Find the next sent page for the next iteration */
1771 run_start = find_next_zero_bit(unsentmap, last + 1,
1772 run_start);
1773 } else {
1774 /* Find the next dirty page for the next iteration */
1775 run_start = find_next_bit(bitmap, last + 1, run_start);
1781 * Utility for the outgoing postcopy code.
1783 * Discard any partially sent host-page size chunks, mark any partially
1784 * dirty host-page size chunks as all dirty.
1786 * Returns: 0 on success
1788 static int postcopy_chunk_hostpages(MigrationState *ms)
1790 struct RAMBlock *block;
1792 if (qemu_host_page_size == TARGET_PAGE_SIZE) {
1793 /* Easy case - TPS==HPS - nothing to be done */
1794 return 0;
1797 /* Easiest way to make sure we don't resume in the middle of a host-page */
1798 last_seen_block = NULL;
1799 last_sent_block = NULL;
1800 last_offset = 0;
1802 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1803 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1805 PostcopyDiscardState *pds =
1806 postcopy_discard_send_init(ms, first, block->idstr);
1808 /* First pass: Discard all partially sent host pages */
1809 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1811 * Second pass: Ensure that all partially dirty host pages are made
1812 * fully dirty.
1814 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1816 postcopy_discard_send_finish(ms, pds);
1817 } /* ram_list loop */
1819 return 0;
1823 * Transmit the set of pages to be discarded after precopy to the target
1824 * these are pages that:
1825 * a) Have been previously transmitted but are now dirty again
1826 * b) Pages that have never been transmitted, this ensures that
1827 * any pages on the destination that have been mapped by background
1828 * tasks get discarded (transparent huge pages is the specific concern)
1829 * Hopefully this is pretty sparse
1831 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1833 int ret;
1834 unsigned long *bitmap, *unsentmap;
1836 rcu_read_lock();
1838 /* This should be our last sync, the src is now paused */
1839 migration_bitmap_sync();
1841 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1842 if (!unsentmap) {
1843 /* We don't have a safe way to resize the sentmap, so
1844 * if the bitmap was resized it will be NULL at this
1845 * point.
1847 error_report("migration ram resized during precopy phase");
1848 rcu_read_unlock();
1849 return -EINVAL;
1852 /* Deal with TPS != HPS */
1853 ret = postcopy_chunk_hostpages(ms);
1854 if (ret) {
1855 rcu_read_unlock();
1856 return ret;
1860 * Update the unsentmap to be unsentmap = unsentmap | dirty
1862 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1863 bitmap_or(unsentmap, unsentmap, bitmap,
1864 last_ram_offset() >> TARGET_PAGE_BITS);
1867 trace_ram_postcopy_send_discard_bitmap();
1868 #ifdef DEBUG_POSTCOPY
1869 ram_debug_dump_bitmap(unsentmap, true);
1870 #endif
1872 ret = postcopy_each_ram_send_discard(ms);
1873 rcu_read_unlock();
1875 return ret;
1879 * At the start of the postcopy phase of migration, any now-dirty
1880 * precopied pages are discarded.
1882 * start, length describe a byte address range within the RAMBlock
1884 * Returns 0 on success.
1886 int ram_discard_range(MigrationIncomingState *mis,
1887 const char *block_name,
1888 uint64_t start, size_t length)
1890 int ret = -1;
1892 rcu_read_lock();
1893 RAMBlock *rb = qemu_ram_block_by_name(block_name);
1895 if (!rb) {
1896 error_report("ram_discard_range: Failed to find block '%s'",
1897 block_name);
1898 goto err;
1901 uint8_t *host_startaddr = rb->host + start;
1903 if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
1904 error_report("ram_discard_range: Unaligned start address: %p",
1905 host_startaddr);
1906 goto err;
1909 if ((start + length) <= rb->used_length) {
1910 uint8_t *host_endaddr = host_startaddr + length;
1911 if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
1912 error_report("ram_discard_range: Unaligned end address: %p",
1913 host_endaddr);
1914 goto err;
1916 ret = postcopy_ram_discard_range(mis, host_startaddr, length);
1917 } else {
1918 error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
1919 "/%zx/" RAM_ADDR_FMT")",
1920 block_name, start, length, rb->used_length);
1923 err:
1924 rcu_read_unlock();
1926 return ret;
1929 static int ram_save_init_globals(void)
1931 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1933 dirty_rate_high_cnt = 0;
1934 bitmap_sync_count = 0;
1935 migration_bitmap_sync_init();
1936 qemu_mutex_init(&migration_bitmap_mutex);
1938 if (migrate_use_xbzrle()) {
1939 XBZRLE_cache_lock();
1940 ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
1941 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1942 TARGET_PAGE_SIZE,
1943 TARGET_PAGE_SIZE);
1944 if (!XBZRLE.cache) {
1945 XBZRLE_cache_unlock();
1946 error_report("Error creating cache");
1947 return -1;
1949 XBZRLE_cache_unlock();
1951 /* We prefer not to abort if there is no memory */
1952 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1953 if (!XBZRLE.encoded_buf) {
1954 error_report("Error allocating encoded_buf");
1955 return -1;
1958 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1959 if (!XBZRLE.current_buf) {
1960 error_report("Error allocating current_buf");
1961 g_free(XBZRLE.encoded_buf);
1962 XBZRLE.encoded_buf = NULL;
1963 return -1;
1966 acct_clear();
1969 /* For memory_global_dirty_log_start below. */
1970 qemu_mutex_lock_iothread();
1972 qemu_mutex_lock_ramlist();
1973 rcu_read_lock();
1974 bytes_transferred = 0;
1975 reset_ram_globals();
1977 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
1978 /* Skip setting bitmap if there is no RAM */
1979 if (ram_bytes_total()) {
1980 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1981 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
1982 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
1984 if (migrate_postcopy_ram()) {
1985 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
1986 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
1991 * Count the total number of pages used by ram blocks not including any
1992 * gaps due to alignment or unplugs.
1994 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1996 memory_global_dirty_log_start();
1997 migration_bitmap_sync();
1998 qemu_mutex_unlock_ramlist();
1999 qemu_mutex_unlock_iothread();
2000 rcu_read_unlock();
2002 return 0;
2005 /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2006 * long-running RCU critical section. When rcu-reclaims in the code
2007 * start to become numerous it will be necessary to reduce the
2008 * granularity of these critical sections.
2011 static int ram_save_setup(QEMUFile *f, void *opaque)
2013 RAMBlock *block;
2015 /* migration has already setup the bitmap, reuse it. */
2016 if (!migration_in_colo_state()) {
2017 if (ram_save_init_globals() < 0) {
2018 return -1;
2022 rcu_read_lock();
2024 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2026 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2027 qemu_put_byte(f, strlen(block->idstr));
2028 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2029 qemu_put_be64(f, block->used_length);
2030 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2031 qemu_put_be64(f, block->page_size);
2035 rcu_read_unlock();
2037 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2038 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2040 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2042 return 0;
2045 static int ram_save_iterate(QEMUFile *f, void *opaque)
2047 int ret;
2048 int i;
2049 int64_t t0;
2050 int done = 0;
2052 rcu_read_lock();
2053 if (ram_list.version != last_version) {
2054 reset_ram_globals();
2057 /* Read version before ram_list.blocks */
2058 smp_rmb();
2060 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2062 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2063 i = 0;
2064 while ((ret = qemu_file_rate_limit(f)) == 0) {
2065 int pages;
2067 pages = ram_find_and_save_block(f, false, &bytes_transferred);
2068 /* no more pages to sent */
2069 if (pages == 0) {
2070 done = 1;
2071 break;
2073 acct_info.iterations++;
2075 /* we want to check in the 1st loop, just in case it was the 1st time
2076 and we had to sync the dirty bitmap.
2077 qemu_get_clock_ns() is a bit expensive, so we only check each some
2078 iterations
2080 if ((i & 63) == 0) {
2081 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2082 if (t1 > MAX_WAIT) {
2083 trace_ram_save_iterate_big_wait(t1, i);
2084 break;
2087 i++;
2089 flush_compressed_data(f);
2090 rcu_read_unlock();
2093 * Must occur before EOS (or any QEMUFile operation)
2094 * because of RDMA protocol.
2096 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2098 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2099 bytes_transferred += 8;
2101 ret = qemu_file_get_error(f);
2102 if (ret < 0) {
2103 return ret;
2106 return done;
2109 /* Called with iothread lock */
2110 static int ram_save_complete(QEMUFile *f, void *opaque)
2112 rcu_read_lock();
2114 if (!migration_in_postcopy(migrate_get_current())) {
2115 migration_bitmap_sync();
2118 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2120 /* try transferring iterative blocks of memory */
2122 /* flush all remaining blocks regardless of rate limiting */
2123 while (true) {
2124 int pages;
2126 pages = ram_find_and_save_block(f, !migration_in_colo_state(),
2127 &bytes_transferred);
2128 /* no more blocks to sent */
2129 if (pages == 0) {
2130 break;
2134 flush_compressed_data(f);
2135 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2137 rcu_read_unlock();
2139 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2141 return 0;
2144 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2145 uint64_t *non_postcopiable_pending,
2146 uint64_t *postcopiable_pending)
2148 uint64_t remaining_size;
2150 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2152 if (!migration_in_postcopy(migrate_get_current()) &&
2153 remaining_size < max_size) {
2154 qemu_mutex_lock_iothread();
2155 rcu_read_lock();
2156 migration_bitmap_sync();
2157 rcu_read_unlock();
2158 qemu_mutex_unlock_iothread();
2159 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2162 /* We can do postcopy, and all the data is postcopiable */
2163 *postcopiable_pending += remaining_size;
2166 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2168 unsigned int xh_len;
2169 int xh_flags;
2170 uint8_t *loaded_data;
2172 if (!xbzrle_decoded_buf) {
2173 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2175 loaded_data = xbzrle_decoded_buf;
2177 /* extract RLE header */
2178 xh_flags = qemu_get_byte(f);
2179 xh_len = qemu_get_be16(f);
2181 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2182 error_report("Failed to load XBZRLE page - wrong compression!");
2183 return -1;
2186 if (xh_len > TARGET_PAGE_SIZE) {
2187 error_report("Failed to load XBZRLE page - len overflow!");
2188 return -1;
2190 /* load data and decode */
2191 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2193 /* decode RLE */
2194 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2195 TARGET_PAGE_SIZE) == -1) {
2196 error_report("Failed to load XBZRLE page - decode error!");
2197 return -1;
2200 return 0;
2203 /* Must be called from within a rcu critical section.
2204 * Returns a pointer from within the RCU-protected ram_list.
2207 * Read a RAMBlock ID from the stream f.
2209 * f: Stream to read from
2210 * flags: Page flags (mostly to see if it's a continuation of previous block)
2212 static inline RAMBlock *ram_block_from_stream(QEMUFile *f,
2213 int flags)
2215 static RAMBlock *block = NULL;
2216 char id[256];
2217 uint8_t len;
2219 if (flags & RAM_SAVE_FLAG_CONTINUE) {
2220 if (!block) {
2221 error_report("Ack, bad migration stream!");
2222 return NULL;
2224 return block;
2227 len = qemu_get_byte(f);
2228 qemu_get_buffer(f, (uint8_t *)id, len);
2229 id[len] = 0;
2231 block = qemu_ram_block_by_name(id);
2232 if (!block) {
2233 error_report("Can't find block %s", id);
2234 return NULL;
2237 return block;
2240 static inline void *host_from_ram_block_offset(RAMBlock *block,
2241 ram_addr_t offset)
2243 if (!offset_in_ramblock(block, offset)) {
2244 return NULL;
2247 return block->host + offset;
2251 * If a page (or a whole RDMA chunk) has been
2252 * determined to be zero, then zap it.
2254 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2256 if (ch != 0 || !is_zero_range(host, size)) {
2257 memset(host, ch, size);
2261 static void *do_data_decompress(void *opaque)
2263 DecompressParam *param = opaque;
2264 unsigned long pagesize;
2265 uint8_t *des;
2266 int len;
2268 qemu_mutex_lock(&param->mutex);
2269 while (!param->quit) {
2270 if (param->des) {
2271 des = param->des;
2272 len = param->len;
2273 param->des = 0;
2274 qemu_mutex_unlock(&param->mutex);
2276 pagesize = TARGET_PAGE_SIZE;
2277 /* uncompress() will return failed in some case, especially
2278 * when the page is dirted when doing the compression, it's
2279 * not a problem because the dirty page will be retransferred
2280 * and uncompress() won't break the data in other pages.
2282 uncompress((Bytef *)des, &pagesize,
2283 (const Bytef *)param->compbuf, len);
2285 qemu_mutex_lock(&decomp_done_lock);
2286 param->done = true;
2287 qemu_cond_signal(&decomp_done_cond);
2288 qemu_mutex_unlock(&decomp_done_lock);
2290 qemu_mutex_lock(&param->mutex);
2291 } else {
2292 qemu_cond_wait(&param->cond, &param->mutex);
2295 qemu_mutex_unlock(&param->mutex);
2297 return NULL;
2300 static void wait_for_decompress_done(void)
2302 int idx, thread_count;
2304 if (!migrate_use_compression()) {
2305 return;
2308 thread_count = migrate_decompress_threads();
2309 qemu_mutex_lock(&decomp_done_lock);
2310 for (idx = 0; idx < thread_count; idx++) {
2311 while (!decomp_param[idx].done) {
2312 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2315 qemu_mutex_unlock(&decomp_done_lock);
2318 void migrate_decompress_threads_create(void)
2320 int i, thread_count;
2322 thread_count = migrate_decompress_threads();
2323 decompress_threads = g_new0(QemuThread, thread_count);
2324 decomp_param = g_new0(DecompressParam, thread_count);
2325 qemu_mutex_init(&decomp_done_lock);
2326 qemu_cond_init(&decomp_done_cond);
2327 for (i = 0; i < thread_count; i++) {
2328 qemu_mutex_init(&decomp_param[i].mutex);
2329 qemu_cond_init(&decomp_param[i].cond);
2330 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2331 decomp_param[i].done = true;
2332 decomp_param[i].quit = false;
2333 qemu_thread_create(decompress_threads + i, "decompress",
2334 do_data_decompress, decomp_param + i,
2335 QEMU_THREAD_JOINABLE);
2339 void migrate_decompress_threads_join(void)
2341 int i, thread_count;
2343 thread_count = migrate_decompress_threads();
2344 for (i = 0; i < thread_count; i++) {
2345 qemu_mutex_lock(&decomp_param[i].mutex);
2346 decomp_param[i].quit = true;
2347 qemu_cond_signal(&decomp_param[i].cond);
2348 qemu_mutex_unlock(&decomp_param[i].mutex);
2350 for (i = 0; i < thread_count; i++) {
2351 qemu_thread_join(decompress_threads + i);
2352 qemu_mutex_destroy(&decomp_param[i].mutex);
2353 qemu_cond_destroy(&decomp_param[i].cond);
2354 g_free(decomp_param[i].compbuf);
2356 g_free(decompress_threads);
2357 g_free(decomp_param);
2358 decompress_threads = NULL;
2359 decomp_param = NULL;
2362 static void decompress_data_with_multi_threads(QEMUFile *f,
2363 void *host, int len)
2365 int idx, thread_count;
2367 thread_count = migrate_decompress_threads();
2368 qemu_mutex_lock(&decomp_done_lock);
2369 while (true) {
2370 for (idx = 0; idx < thread_count; idx++) {
2371 if (decomp_param[idx].done) {
2372 decomp_param[idx].done = false;
2373 qemu_mutex_lock(&decomp_param[idx].mutex);
2374 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2375 decomp_param[idx].des = host;
2376 decomp_param[idx].len = len;
2377 qemu_cond_signal(&decomp_param[idx].cond);
2378 qemu_mutex_unlock(&decomp_param[idx].mutex);
2379 break;
2382 if (idx < thread_count) {
2383 break;
2384 } else {
2385 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2388 qemu_mutex_unlock(&decomp_done_lock);
2392 * Allocate data structures etc needed by incoming migration with postcopy-ram
2393 * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
2395 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2397 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2399 return postcopy_ram_incoming_init(mis, ram_pages);
2403 * Called in postcopy mode by ram_load().
2404 * rcu_read_lock is taken prior to this being called.
2406 static int ram_load_postcopy(QEMUFile *f)
2408 int flags = 0, ret = 0;
2409 bool place_needed = false;
2410 bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
2411 MigrationIncomingState *mis = migration_incoming_get_current();
2412 /* Temporary page that is later 'placed' */
2413 void *postcopy_host_page = postcopy_get_tmp_page(mis);
2414 void *last_host = NULL;
2415 bool all_zero = false;
2417 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2418 ram_addr_t addr;
2419 void *host = NULL;
2420 void *page_buffer = NULL;
2421 void *place_source = NULL;
2422 uint8_t ch;
2424 addr = qemu_get_be64(f);
2425 flags = addr & ~TARGET_PAGE_MASK;
2426 addr &= TARGET_PAGE_MASK;
2428 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2429 place_needed = false;
2430 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
2431 RAMBlock *block = ram_block_from_stream(f, flags);
2433 host = host_from_ram_block_offset(block, addr);
2434 if (!host) {
2435 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2436 ret = -EINVAL;
2437 break;
2440 * Postcopy requires that we place whole host pages atomically.
2441 * To make it atomic, the data is read into a temporary page
2442 * that's moved into place later.
2443 * The migration protocol uses, possibly smaller, target-pages
2444 * however the source ensures it always sends all the components
2445 * of a host page in order.
2447 page_buffer = postcopy_host_page +
2448 ((uintptr_t)host & ~qemu_host_page_mask);
2449 /* If all TP are zero then we can optimise the place */
2450 if (!((uintptr_t)host & ~qemu_host_page_mask)) {
2451 all_zero = true;
2452 } else {
2453 /* not the 1st TP within the HP */
2454 if (host != (last_host + TARGET_PAGE_SIZE)) {
2455 error_report("Non-sequential target page %p/%p",
2456 host, last_host);
2457 ret = -EINVAL;
2458 break;
2464 * If it's the last part of a host page then we place the host
2465 * page
2467 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2468 ~qemu_host_page_mask) == 0;
2469 place_source = postcopy_host_page;
2471 last_host = host;
2473 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2474 case RAM_SAVE_FLAG_COMPRESS:
2475 ch = qemu_get_byte(f);
2476 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2477 if (ch) {
2478 all_zero = false;
2480 break;
2482 case RAM_SAVE_FLAG_PAGE:
2483 all_zero = false;
2484 if (!place_needed || !matching_page_sizes) {
2485 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2486 } else {
2487 /* Avoids the qemu_file copy during postcopy, which is
2488 * going to do a copy later; can only do it when we
2489 * do this read in one go (matching page sizes)
2491 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2492 TARGET_PAGE_SIZE);
2494 break;
2495 case RAM_SAVE_FLAG_EOS:
2496 /* normal exit */
2497 break;
2498 default:
2499 error_report("Unknown combination of migration flags: %#x"
2500 " (postcopy mode)", flags);
2501 ret = -EINVAL;
2504 if (place_needed) {
2505 /* This gets called at the last target page in the host page */
2506 if (all_zero) {
2507 ret = postcopy_place_page_zero(mis,
2508 host + TARGET_PAGE_SIZE -
2509 qemu_host_page_size);
2510 } else {
2511 ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
2512 qemu_host_page_size,
2513 place_source);
2516 if (!ret) {
2517 ret = qemu_file_get_error(f);
2521 return ret;
2524 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2526 int flags = 0, ret = 0;
2527 static uint64_t seq_iter;
2528 int len = 0;
2530 * If system is running in postcopy mode, page inserts to host memory must
2531 * be atomic
2533 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2534 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2535 bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
2537 seq_iter++;
2539 if (version_id != 4) {
2540 ret = -EINVAL;
2543 /* This RCU critical section can be very long running.
2544 * When RCU reclaims in the code start to become numerous,
2545 * it will be necessary to reduce the granularity of this
2546 * critical section.
2548 rcu_read_lock();
2550 if (postcopy_running) {
2551 ret = ram_load_postcopy(f);
2554 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2555 ram_addr_t addr, total_ram_bytes;
2556 void *host = NULL;
2557 uint8_t ch;
2559 addr = qemu_get_be64(f);
2560 flags = addr & ~TARGET_PAGE_MASK;
2561 addr &= TARGET_PAGE_MASK;
2563 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2564 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2565 RAMBlock *block = ram_block_from_stream(f, flags);
2567 host = host_from_ram_block_offset(block, addr);
2568 if (!host) {
2569 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2570 ret = -EINVAL;
2571 break;
2575 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2576 case RAM_SAVE_FLAG_MEM_SIZE:
2577 /* Synchronize RAM block list */
2578 total_ram_bytes = addr;
2579 while (!ret && total_ram_bytes) {
2580 RAMBlock *block;
2581 char id[256];
2582 ram_addr_t length;
2584 len = qemu_get_byte(f);
2585 qemu_get_buffer(f, (uint8_t *)id, len);
2586 id[len] = 0;
2587 length = qemu_get_be64(f);
2589 block = qemu_ram_block_by_name(id);
2590 if (block) {
2591 if (length != block->used_length) {
2592 Error *local_err = NULL;
2594 ret = qemu_ram_resize(block, length,
2595 &local_err);
2596 if (local_err) {
2597 error_report_err(local_err);
2600 /* For postcopy we need to check hugepage sizes match */
2601 if (postcopy_advised &&
2602 block->page_size != qemu_host_page_size) {
2603 uint64_t remote_page_size = qemu_get_be64(f);
2604 if (remote_page_size != block->page_size) {
2605 error_report("Mismatched RAM page size %s "
2606 "(local) %zd != %" PRId64,
2607 id, block->page_size,
2608 remote_page_size);
2609 ret = -EINVAL;
2612 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2613 block->idstr);
2614 } else {
2615 error_report("Unknown ramblock \"%s\", cannot "
2616 "accept migration", id);
2617 ret = -EINVAL;
2620 total_ram_bytes -= length;
2622 break;
2624 case RAM_SAVE_FLAG_COMPRESS:
2625 ch = qemu_get_byte(f);
2626 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2627 break;
2629 case RAM_SAVE_FLAG_PAGE:
2630 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2631 break;
2633 case RAM_SAVE_FLAG_COMPRESS_PAGE:
2634 len = qemu_get_be32(f);
2635 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2636 error_report("Invalid compressed data length: %d", len);
2637 ret = -EINVAL;
2638 break;
2640 decompress_data_with_multi_threads(f, host, len);
2641 break;
2643 case RAM_SAVE_FLAG_XBZRLE:
2644 if (load_xbzrle(f, addr, host) < 0) {
2645 error_report("Failed to decompress XBZRLE page at "
2646 RAM_ADDR_FMT, addr);
2647 ret = -EINVAL;
2648 break;
2650 break;
2651 case RAM_SAVE_FLAG_EOS:
2652 /* normal exit */
2653 break;
2654 default:
2655 if (flags & RAM_SAVE_FLAG_HOOK) {
2656 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2657 } else {
2658 error_report("Unknown combination of migration flags: %#x",
2659 flags);
2660 ret = -EINVAL;
2663 if (!ret) {
2664 ret = qemu_file_get_error(f);
2668 wait_for_decompress_done();
2669 rcu_read_unlock();
2670 trace_ram_load_complete(ret, seq_iter);
2671 return ret;
2674 static SaveVMHandlers savevm_ram_handlers = {
2675 .save_live_setup = ram_save_setup,
2676 .save_live_iterate = ram_save_iterate,
2677 .save_live_complete_postcopy = ram_save_complete,
2678 .save_live_complete_precopy = ram_save_complete,
2679 .save_live_pending = ram_save_pending,
2680 .load_state = ram_load,
2681 .cleanup = ram_migration_cleanup,
2684 void ram_mig_init(void)
2686 qemu_mutex_init(&XBZRLE.lock);
2687 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);