Postcopy: Maintain unsentmap
[qemu.git] / migration / ram.c
blobc703176c747908a727736f27b26384b85ac69b03
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
28 #include <stdint.h>
29 #include <zlib.h>
30 #include "qemu/bitops.h"
31 #include "qemu/bitmap.h"
32 #include "qemu/timer.h"
33 #include "qemu/main-loop.h"
34 #include "migration/migration.h"
35 #include "exec/address-spaces.h"
36 #include "migration/page_cache.h"
37 #include "qemu/error-report.h"
38 #include "trace.h"
39 #include "exec/ram_addr.h"
40 #include "qemu/rcu_queue.h"
42 #ifdef DEBUG_MIGRATION_RAM
43 #define DPRINTF(fmt, ...) \
44 do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define DPRINTF(fmt, ...) \
47 do { } while (0)
48 #endif
50 static int dirty_rate_high_cnt;
52 static uint64_t bitmap_sync_count;
54 /***********************************************************/
55 /* ram save/restore */
57 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
58 #define RAM_SAVE_FLAG_COMPRESS 0x02
59 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
60 #define RAM_SAVE_FLAG_PAGE 0x08
61 #define RAM_SAVE_FLAG_EOS 0x10
62 #define RAM_SAVE_FLAG_CONTINUE 0x20
63 #define RAM_SAVE_FLAG_XBZRLE 0x40
64 /* 0x80 is reserved in migration.h start with 0x100 next */
65 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
67 static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
69 static inline bool is_zero_range(uint8_t *p, uint64_t size)
71 return buffer_find_nonzero_offset(p, size) == size;
74 /* struct contains XBZRLE cache and a static page
75 used by the compression */
76 static struct {
77 /* buffer used for XBZRLE encoding */
78 uint8_t *encoded_buf;
79 /* buffer for storing page content */
80 uint8_t *current_buf;
81 /* Cache for XBZRLE, Protected by lock. */
82 PageCache *cache;
83 QemuMutex lock;
84 } XBZRLE;
86 /* buffer used for XBZRLE decoding */
87 static uint8_t *xbzrle_decoded_buf;
89 static void XBZRLE_cache_lock(void)
91 if (migrate_use_xbzrle())
92 qemu_mutex_lock(&XBZRLE.lock);
95 static void XBZRLE_cache_unlock(void)
97 if (migrate_use_xbzrle())
98 qemu_mutex_unlock(&XBZRLE.lock);
102 * called from qmp_migrate_set_cache_size in main thread, possibly while
103 * a migration is in progress.
104 * A running migration maybe using the cache and might finish during this
105 * call, hence changes to the cache are protected by XBZRLE.lock().
107 int64_t xbzrle_cache_resize(int64_t new_size)
109 PageCache *new_cache;
110 int64_t ret;
112 if (new_size < TARGET_PAGE_SIZE) {
113 return -1;
116 XBZRLE_cache_lock();
118 if (XBZRLE.cache != NULL) {
119 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
120 goto out_new_size;
122 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
123 TARGET_PAGE_SIZE);
124 if (!new_cache) {
125 error_report("Error creating cache");
126 ret = -1;
127 goto out;
130 cache_fini(XBZRLE.cache);
131 XBZRLE.cache = new_cache;
134 out_new_size:
135 ret = pow2floor(new_size);
136 out:
137 XBZRLE_cache_unlock();
138 return ret;
141 /* accounting for migration statistics */
142 typedef struct AccountingInfo {
143 uint64_t dup_pages;
144 uint64_t skipped_pages;
145 uint64_t norm_pages;
146 uint64_t iterations;
147 uint64_t xbzrle_bytes;
148 uint64_t xbzrle_pages;
149 uint64_t xbzrle_cache_miss;
150 double xbzrle_cache_miss_rate;
151 uint64_t xbzrle_overflows;
152 } AccountingInfo;
154 static AccountingInfo acct_info;
156 static void acct_clear(void)
158 memset(&acct_info, 0, sizeof(acct_info));
161 uint64_t dup_mig_bytes_transferred(void)
163 return acct_info.dup_pages * TARGET_PAGE_SIZE;
166 uint64_t dup_mig_pages_transferred(void)
168 return acct_info.dup_pages;
171 uint64_t skipped_mig_bytes_transferred(void)
173 return acct_info.skipped_pages * TARGET_PAGE_SIZE;
176 uint64_t skipped_mig_pages_transferred(void)
178 return acct_info.skipped_pages;
181 uint64_t norm_mig_bytes_transferred(void)
183 return acct_info.norm_pages * TARGET_PAGE_SIZE;
186 uint64_t norm_mig_pages_transferred(void)
188 return acct_info.norm_pages;
191 uint64_t xbzrle_mig_bytes_transferred(void)
193 return acct_info.xbzrle_bytes;
196 uint64_t xbzrle_mig_pages_transferred(void)
198 return acct_info.xbzrle_pages;
201 uint64_t xbzrle_mig_pages_cache_miss(void)
203 return acct_info.xbzrle_cache_miss;
206 double xbzrle_mig_cache_miss_rate(void)
208 return acct_info.xbzrle_cache_miss_rate;
211 uint64_t xbzrle_mig_pages_overflow(void)
213 return acct_info.xbzrle_overflows;
216 /* This is the last block that we have visited serching for dirty pages
218 static RAMBlock *last_seen_block;
219 /* This is the last block from where we have sent data */
220 static RAMBlock *last_sent_block;
221 static ram_addr_t last_offset;
222 static QemuMutex migration_bitmap_mutex;
223 static uint64_t migration_dirty_pages;
224 static uint32_t last_version;
225 static bool ram_bulk_stage;
227 /* used by the search for pages to send */
228 struct PageSearchStatus {
229 /* Current block being searched */
230 RAMBlock *block;
231 /* Current offset to search from */
232 ram_addr_t offset;
233 /* Set once we wrap around */
234 bool complete_round;
236 typedef struct PageSearchStatus PageSearchStatus;
238 static struct BitmapRcu {
239 struct rcu_head rcu;
240 /* Main migration bitmap */
241 unsigned long *bmap;
242 /* bitmap of pages that haven't been sent even once
243 * only maintained and used in postcopy at the moment
244 * where it's used to send the dirtymap at the start
245 * of the postcopy phase
247 unsigned long *unsentmap;
248 } *migration_bitmap_rcu;
250 struct CompressParam {
251 bool start;
252 bool done;
253 QEMUFile *file;
254 QemuMutex mutex;
255 QemuCond cond;
256 RAMBlock *block;
257 ram_addr_t offset;
259 typedef struct CompressParam CompressParam;
261 struct DecompressParam {
262 bool start;
263 QemuMutex mutex;
264 QemuCond cond;
265 void *des;
266 uint8 *compbuf;
267 int len;
269 typedef struct DecompressParam DecompressParam;
271 static CompressParam *comp_param;
272 static QemuThread *compress_threads;
273 /* comp_done_cond is used to wake up the migration thread when
274 * one of the compression threads has finished the compression.
275 * comp_done_lock is used to co-work with comp_done_cond.
277 static QemuMutex *comp_done_lock;
278 static QemuCond *comp_done_cond;
279 /* The empty QEMUFileOps will be used by file in CompressParam */
280 static const QEMUFileOps empty_ops = { };
282 static bool compression_switch;
283 static bool quit_comp_thread;
284 static bool quit_decomp_thread;
285 static DecompressParam *decomp_param;
286 static QemuThread *decompress_threads;
287 static uint8_t *compressed_data_buf;
289 static int do_compress_ram_page(CompressParam *param);
291 static void *do_data_compress(void *opaque)
293 CompressParam *param = opaque;
295 while (!quit_comp_thread) {
296 qemu_mutex_lock(&param->mutex);
297 /* Re-check the quit_comp_thread in case of
298 * terminate_compression_threads is called just before
299 * qemu_mutex_lock(&param->mutex) and after
300 * while(!quit_comp_thread), re-check it here can make
301 * sure the compression thread terminate as expected.
303 while (!param->start && !quit_comp_thread) {
304 qemu_cond_wait(&param->cond, &param->mutex);
306 if (!quit_comp_thread) {
307 do_compress_ram_page(param);
309 param->start = false;
310 qemu_mutex_unlock(&param->mutex);
312 qemu_mutex_lock(comp_done_lock);
313 param->done = true;
314 qemu_cond_signal(comp_done_cond);
315 qemu_mutex_unlock(comp_done_lock);
318 return NULL;
321 static inline void terminate_compression_threads(void)
323 int idx, thread_count;
325 thread_count = migrate_compress_threads();
326 quit_comp_thread = true;
327 for (idx = 0; idx < thread_count; idx++) {
328 qemu_mutex_lock(&comp_param[idx].mutex);
329 qemu_cond_signal(&comp_param[idx].cond);
330 qemu_mutex_unlock(&comp_param[idx].mutex);
334 void migrate_compress_threads_join(void)
336 int i, thread_count;
338 if (!migrate_use_compression()) {
339 return;
341 terminate_compression_threads();
342 thread_count = migrate_compress_threads();
343 for (i = 0; i < thread_count; i++) {
344 qemu_thread_join(compress_threads + i);
345 qemu_fclose(comp_param[i].file);
346 qemu_mutex_destroy(&comp_param[i].mutex);
347 qemu_cond_destroy(&comp_param[i].cond);
349 qemu_mutex_destroy(comp_done_lock);
350 qemu_cond_destroy(comp_done_cond);
351 g_free(compress_threads);
352 g_free(comp_param);
353 g_free(comp_done_cond);
354 g_free(comp_done_lock);
355 compress_threads = NULL;
356 comp_param = NULL;
357 comp_done_cond = NULL;
358 comp_done_lock = NULL;
361 void migrate_compress_threads_create(void)
363 int i, thread_count;
365 if (!migrate_use_compression()) {
366 return;
368 quit_comp_thread = false;
369 compression_switch = true;
370 thread_count = migrate_compress_threads();
371 compress_threads = g_new0(QemuThread, thread_count);
372 comp_param = g_new0(CompressParam, thread_count);
373 comp_done_cond = g_new0(QemuCond, 1);
374 comp_done_lock = g_new0(QemuMutex, 1);
375 qemu_cond_init(comp_done_cond);
376 qemu_mutex_init(comp_done_lock);
377 for (i = 0; i < thread_count; i++) {
378 /* com_param[i].file is just used as a dummy buffer to save data, set
379 * it's ops to empty.
381 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
382 comp_param[i].done = true;
383 qemu_mutex_init(&comp_param[i].mutex);
384 qemu_cond_init(&comp_param[i].cond);
385 qemu_thread_create(compress_threads + i, "compress",
386 do_data_compress, comp_param + i,
387 QEMU_THREAD_JOINABLE);
392 * save_page_header: Write page header to wire
394 * If this is the 1st block, it also writes the block identification
396 * Returns: Number of bytes written
398 * @f: QEMUFile where to send the data
399 * @block: block that contains the page we want to send
400 * @offset: offset inside the block for the page
401 * in the lower bits, it contains flags
403 static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
405 size_t size, len;
407 qemu_put_be64(f, offset);
408 size = 8;
410 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
411 len = strlen(block->idstr);
412 qemu_put_byte(f, len);
413 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
414 size += 1 + len;
416 return size;
419 /* Reduce amount of guest cpu execution to hopefully slow down memory writes.
420 * If guest dirty memory rate is reduced below the rate at which we can
421 * transfer pages to the destination then we should be able to complete
422 * migration. Some workloads dirty memory way too fast and will not effectively
423 * converge, even with auto-converge.
425 static void mig_throttle_guest_down(void)
427 MigrationState *s = migrate_get_current();
428 uint64_t pct_initial =
429 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL];
430 uint64_t pct_icrement =
431 s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT];
433 /* We have not started throttling yet. Let's start it. */
434 if (!cpu_throttle_active()) {
435 cpu_throttle_set(pct_initial);
436 } else {
437 /* Throttling already on, just increase the rate */
438 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
442 /* Update the xbzrle cache to reflect a page that's been sent as all 0.
443 * The important thing is that a stale (not-yet-0'd) page be replaced
444 * by the new data.
445 * As a bonus, if the page wasn't in the cache it gets added so that
446 * when a small write is made into the 0'd page it gets XBZRLE sent
448 static void xbzrle_cache_zero_page(ram_addr_t current_addr)
450 if (ram_bulk_stage || !migrate_use_xbzrle()) {
451 return;
454 /* We don't care if this fails to allocate a new cache page
455 * as long as it updated an old one */
456 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
457 bitmap_sync_count);
460 #define ENCODING_FLAG_XBZRLE 0x1
463 * save_xbzrle_page: compress and send current page
465 * Returns: 1 means that we wrote the page
466 * 0 means that page is identical to the one already sent
467 * -1 means that xbzrle would be longer than normal
469 * @f: QEMUFile where to send the data
470 * @current_data:
471 * @current_addr:
472 * @block: block that contains the page we want to send
473 * @offset: offset inside the block for the page
474 * @last_stage: if we are at the completion stage
475 * @bytes_transferred: increase it with the number of transferred bytes
477 static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
478 ram_addr_t current_addr, RAMBlock *block,
479 ram_addr_t offset, bool last_stage,
480 uint64_t *bytes_transferred)
482 int encoded_len = 0, bytes_xbzrle;
483 uint8_t *prev_cached_page;
485 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
486 acct_info.xbzrle_cache_miss++;
487 if (!last_stage) {
488 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
489 bitmap_sync_count) == -1) {
490 return -1;
491 } else {
492 /* update *current_data when the page has been
493 inserted into cache */
494 *current_data = get_cached_data(XBZRLE.cache, current_addr);
497 return -1;
500 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
502 /* save current buffer into memory */
503 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
505 /* XBZRLE encoding (if there is no overflow) */
506 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
507 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
508 TARGET_PAGE_SIZE);
509 if (encoded_len == 0) {
510 DPRINTF("Skipping unmodified page\n");
511 return 0;
512 } else if (encoded_len == -1) {
513 DPRINTF("Overflow\n");
514 acct_info.xbzrle_overflows++;
515 /* update data in the cache */
516 if (!last_stage) {
517 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
518 *current_data = prev_cached_page;
520 return -1;
523 /* we need to update the data in the cache, in order to get the same data */
524 if (!last_stage) {
525 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
528 /* Send XBZRLE based compressed page */
529 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
530 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
531 qemu_put_be16(f, encoded_len);
532 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
533 bytes_xbzrle += encoded_len + 1 + 2;
534 acct_info.xbzrle_pages++;
535 acct_info.xbzrle_bytes += bytes_xbzrle;
536 *bytes_transferred += bytes_xbzrle;
538 return 1;
541 /* Called with rcu_read_lock() to protect migration_bitmap
542 * rb: The RAMBlock to search for dirty pages in
543 * start: Start address (typically so we can continue from previous page)
544 * ram_addr_abs: Pointer into which to store the address of the dirty page
545 * within the global ram_addr space
547 * Returns: byte offset within memory region of the start of a dirty page
549 static inline
550 ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb,
551 ram_addr_t start,
552 ram_addr_t *ram_addr_abs)
554 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
555 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
556 uint64_t rb_size = rb->used_length;
557 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
558 unsigned long *bitmap;
560 unsigned long next;
562 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
563 if (ram_bulk_stage && nr > base) {
564 next = nr + 1;
565 } else {
566 next = find_next_bit(bitmap, size, nr);
569 if (next < size) {
570 clear_bit(next, bitmap);
571 migration_dirty_pages--;
573 *ram_addr_abs = next << TARGET_PAGE_BITS;
574 return (next - base) << TARGET_PAGE_BITS;
577 /* Called with rcu_read_lock() to protect migration_bitmap */
578 static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
580 unsigned long *bitmap;
581 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
582 migration_dirty_pages +=
583 cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
586 /* Fix me: there are too many global variables used in migration process. */
587 static int64_t start_time;
588 static int64_t bytes_xfer_prev;
589 static int64_t num_dirty_pages_period;
590 static uint64_t xbzrle_cache_miss_prev;
591 static uint64_t iterations_prev;
593 static void migration_bitmap_sync_init(void)
595 start_time = 0;
596 bytes_xfer_prev = 0;
597 num_dirty_pages_period = 0;
598 xbzrle_cache_miss_prev = 0;
599 iterations_prev = 0;
602 /* Called with iothread lock held, to protect ram_list.dirty_memory[] */
603 static void migration_bitmap_sync(void)
605 RAMBlock *block;
606 uint64_t num_dirty_pages_init = migration_dirty_pages;
607 MigrationState *s = migrate_get_current();
608 int64_t end_time;
609 int64_t bytes_xfer_now;
611 bitmap_sync_count++;
613 if (!bytes_xfer_prev) {
614 bytes_xfer_prev = ram_bytes_transferred();
617 if (!start_time) {
618 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
621 trace_migration_bitmap_sync_start();
622 address_space_sync_dirty_bitmap(&address_space_memory);
624 qemu_mutex_lock(&migration_bitmap_mutex);
625 rcu_read_lock();
626 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
627 migration_bitmap_sync_range(block->offset, block->used_length);
629 rcu_read_unlock();
630 qemu_mutex_unlock(&migration_bitmap_mutex);
632 trace_migration_bitmap_sync_end(migration_dirty_pages
633 - num_dirty_pages_init);
634 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
635 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
637 /* more than 1 second = 1000 millisecons */
638 if (end_time > start_time + 1000) {
639 if (migrate_auto_converge()) {
640 /* The following detection logic can be refined later. For now:
641 Check to see if the dirtied bytes is 50% more than the approx.
642 amount of bytes that just got transferred since the last time we
643 were in this routine. If that happens twice, start or increase
644 throttling */
645 bytes_xfer_now = ram_bytes_transferred();
647 if (s->dirty_pages_rate &&
648 (num_dirty_pages_period * TARGET_PAGE_SIZE >
649 (bytes_xfer_now - bytes_xfer_prev)/2) &&
650 (dirty_rate_high_cnt++ >= 2)) {
651 trace_migration_throttle();
652 dirty_rate_high_cnt = 0;
653 mig_throttle_guest_down();
655 bytes_xfer_prev = bytes_xfer_now;
658 if (migrate_use_xbzrle()) {
659 if (iterations_prev != acct_info.iterations) {
660 acct_info.xbzrle_cache_miss_rate =
661 (double)(acct_info.xbzrle_cache_miss -
662 xbzrle_cache_miss_prev) /
663 (acct_info.iterations - iterations_prev);
665 iterations_prev = acct_info.iterations;
666 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
668 s->dirty_pages_rate = num_dirty_pages_period * 1000
669 / (end_time - start_time);
670 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
671 start_time = end_time;
672 num_dirty_pages_period = 0;
674 s->dirty_sync_count = bitmap_sync_count;
678 * save_zero_page: Send the zero page to the stream
680 * Returns: Number of pages written.
682 * @f: QEMUFile where to send the data
683 * @block: block that contains the page we want to send
684 * @offset: offset inside the block for the page
685 * @p: pointer to the page
686 * @bytes_transferred: increase it with the number of transferred bytes
688 static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
689 uint8_t *p, uint64_t *bytes_transferred)
691 int pages = -1;
693 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
694 acct_info.dup_pages++;
695 *bytes_transferred += save_page_header(f, block,
696 offset | RAM_SAVE_FLAG_COMPRESS);
697 qemu_put_byte(f, 0);
698 *bytes_transferred += 1;
699 pages = 1;
702 return pages;
706 * ram_save_page: Send the given page to the stream
708 * Returns: Number of pages written.
710 * @f: QEMUFile where to send the data
711 * @block: block that contains the page we want to send
712 * @offset: offset inside the block for the page
713 * @last_stage: if we are at the completion stage
714 * @bytes_transferred: increase it with the number of transferred bytes
716 static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
717 bool last_stage, uint64_t *bytes_transferred)
719 int pages = -1;
720 uint64_t bytes_xmit;
721 ram_addr_t current_addr;
722 uint8_t *p;
723 int ret;
724 bool send_async = true;
726 p = block->host + offset;
728 /* In doubt sent page as normal */
729 bytes_xmit = 0;
730 ret = ram_control_save_page(f, block->offset,
731 offset, TARGET_PAGE_SIZE, &bytes_xmit);
732 if (bytes_xmit) {
733 *bytes_transferred += bytes_xmit;
734 pages = 1;
737 XBZRLE_cache_lock();
739 current_addr = block->offset + offset;
741 if (block == last_sent_block) {
742 offset |= RAM_SAVE_FLAG_CONTINUE;
744 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
745 if (ret != RAM_SAVE_CONTROL_DELAYED) {
746 if (bytes_xmit > 0) {
747 acct_info.norm_pages++;
748 } else if (bytes_xmit == 0) {
749 acct_info.dup_pages++;
752 } else {
753 pages = save_zero_page(f, block, offset, p, bytes_transferred);
754 if (pages > 0) {
755 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
756 * page would be stale
758 xbzrle_cache_zero_page(current_addr);
759 } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
760 pages = save_xbzrle_page(f, &p, current_addr, block,
761 offset, last_stage, bytes_transferred);
762 if (!last_stage) {
763 /* Can't send this cached data async, since the cache page
764 * might get updated before it gets to the wire
766 send_async = false;
771 /* XBZRLE overflow or normal page */
772 if (pages == -1) {
773 *bytes_transferred += save_page_header(f, block,
774 offset | RAM_SAVE_FLAG_PAGE);
775 if (send_async) {
776 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
777 } else {
778 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
780 *bytes_transferred += TARGET_PAGE_SIZE;
781 pages = 1;
782 acct_info.norm_pages++;
785 XBZRLE_cache_unlock();
787 return pages;
790 static int do_compress_ram_page(CompressParam *param)
792 int bytes_sent, blen;
793 uint8_t *p;
794 RAMBlock *block = param->block;
795 ram_addr_t offset = param->offset;
797 p = block->host + (offset & TARGET_PAGE_MASK);
799 bytes_sent = save_page_header(param->file, block, offset |
800 RAM_SAVE_FLAG_COMPRESS_PAGE);
801 blen = qemu_put_compression_data(param->file, p, TARGET_PAGE_SIZE,
802 migrate_compress_level());
803 bytes_sent += blen;
805 return bytes_sent;
808 static inline void start_compression(CompressParam *param)
810 param->done = false;
811 qemu_mutex_lock(&param->mutex);
812 param->start = true;
813 qemu_cond_signal(&param->cond);
814 qemu_mutex_unlock(&param->mutex);
817 static inline void start_decompression(DecompressParam *param)
819 qemu_mutex_lock(&param->mutex);
820 param->start = true;
821 qemu_cond_signal(&param->cond);
822 qemu_mutex_unlock(&param->mutex);
825 static uint64_t bytes_transferred;
827 static void flush_compressed_data(QEMUFile *f)
829 int idx, len, thread_count;
831 if (!migrate_use_compression()) {
832 return;
834 thread_count = migrate_compress_threads();
835 for (idx = 0; idx < thread_count; idx++) {
836 if (!comp_param[idx].done) {
837 qemu_mutex_lock(comp_done_lock);
838 while (!comp_param[idx].done && !quit_comp_thread) {
839 qemu_cond_wait(comp_done_cond, comp_done_lock);
841 qemu_mutex_unlock(comp_done_lock);
843 if (!quit_comp_thread) {
844 len = qemu_put_qemu_file(f, comp_param[idx].file);
845 bytes_transferred += len;
850 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
851 ram_addr_t offset)
853 param->block = block;
854 param->offset = offset;
857 static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
858 ram_addr_t offset,
859 uint64_t *bytes_transferred)
861 int idx, thread_count, bytes_xmit = -1, pages = -1;
863 thread_count = migrate_compress_threads();
864 qemu_mutex_lock(comp_done_lock);
865 while (true) {
866 for (idx = 0; idx < thread_count; idx++) {
867 if (comp_param[idx].done) {
868 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
869 set_compress_params(&comp_param[idx], block, offset);
870 start_compression(&comp_param[idx]);
871 pages = 1;
872 acct_info.norm_pages++;
873 *bytes_transferred += bytes_xmit;
874 break;
877 if (pages > 0) {
878 break;
879 } else {
880 qemu_cond_wait(comp_done_cond, comp_done_lock);
883 qemu_mutex_unlock(comp_done_lock);
885 return pages;
889 * ram_save_compressed_page: compress the given page and send it to the stream
891 * Returns: Number of pages written.
893 * @f: QEMUFile where to send the data
894 * @block: block that contains the page we want to send
895 * @offset: offset inside the block for the page
896 * @last_stage: if we are at the completion stage
897 * @bytes_transferred: increase it with the number of transferred bytes
899 static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block,
900 ram_addr_t offset, bool last_stage,
901 uint64_t *bytes_transferred)
903 int pages = -1;
904 uint64_t bytes_xmit;
905 uint8_t *p;
906 int ret;
908 p = block->host + offset;
910 bytes_xmit = 0;
911 ret = ram_control_save_page(f, block->offset,
912 offset, TARGET_PAGE_SIZE, &bytes_xmit);
913 if (bytes_xmit) {
914 *bytes_transferred += bytes_xmit;
915 pages = 1;
917 if (block == last_sent_block) {
918 offset |= RAM_SAVE_FLAG_CONTINUE;
920 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
921 if (ret != RAM_SAVE_CONTROL_DELAYED) {
922 if (bytes_xmit > 0) {
923 acct_info.norm_pages++;
924 } else if (bytes_xmit == 0) {
925 acct_info.dup_pages++;
928 } else {
929 /* When starting the process of a new block, the first page of
930 * the block should be sent out before other pages in the same
931 * block, and all the pages in last block should have been sent
932 * out, keeping this order is important, because the 'cont' flag
933 * is used to avoid resending the block name.
935 if (block != last_sent_block) {
936 flush_compressed_data(f);
937 pages = save_zero_page(f, block, offset, p, bytes_transferred);
938 if (pages == -1) {
939 set_compress_params(&comp_param[0], block, offset);
940 /* Use the qemu thread to compress the data to make sure the
941 * first page is sent out before other pages
943 bytes_xmit = do_compress_ram_page(&comp_param[0]);
944 acct_info.norm_pages++;
945 qemu_put_qemu_file(f, comp_param[0].file);
946 *bytes_transferred += bytes_xmit;
947 pages = 1;
949 } else {
950 pages = save_zero_page(f, block, offset, p, bytes_transferred);
951 if (pages == -1) {
952 pages = compress_page_with_multi_thread(f, block, offset,
953 bytes_transferred);
958 return pages;
962 * Find the next dirty page and update any state associated with
963 * the search process.
965 * Returns: True if a page is found
967 * @f: Current migration stream.
968 * @pss: Data about the state of the current dirty page scan.
969 * @*again: Set to false if the search has scanned the whole of RAM
971 static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
972 bool *again, ram_addr_t *ram_addr_abs)
974 pss->offset = migration_bitmap_find_and_reset_dirty(pss->block,
975 pss->offset,
976 ram_addr_abs);
977 if (pss->complete_round && pss->block == last_seen_block &&
978 pss->offset >= last_offset) {
980 * We've been once around the RAM and haven't found anything.
981 * Give up.
983 *again = false;
984 return false;
986 if (pss->offset >= pss->block->used_length) {
987 /* Didn't find anything in this RAM Block */
988 pss->offset = 0;
989 pss->block = QLIST_NEXT_RCU(pss->block, next);
990 if (!pss->block) {
991 /* Hit the end of the list */
992 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
993 /* Flag that we've looped */
994 pss->complete_round = true;
995 ram_bulk_stage = false;
996 if (migrate_use_xbzrle()) {
997 /* If xbzrle is on, stop using the data compression at this
998 * point. In theory, xbzrle can do better than compression.
1000 flush_compressed_data(f);
1001 compression_switch = false;
1004 /* Didn't find anything this time, but try again on the new block */
1005 *again = true;
1006 return false;
1007 } else {
1008 /* Can go around again, but... */
1009 *again = true;
1010 /* We've found something so probably don't need to */
1011 return true;
1016 * ram_find_and_save_block: Finds a dirty page and sends it to f
1018 * Called within an RCU critical section.
1020 * Returns: The number of pages written
1021 * 0 means no dirty pages
1023 * @f: QEMUFile where to send the data
1024 * @last_stage: if we are at the completion stage
1025 * @bytes_transferred: increase it with the number of transferred bytes
1028 static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
1029 uint64_t *bytes_transferred)
1031 PageSearchStatus pss;
1032 int pages = 0;
1033 bool again, found;
1034 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1035 ram_addr_t space */
1037 pss.block = last_seen_block;
1038 pss.offset = last_offset;
1039 pss.complete_round = false;
1041 if (!pss.block) {
1042 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1045 do {
1046 found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
1048 if (found) {
1049 if (compression_switch && migrate_use_compression()) {
1050 pages = ram_save_compressed_page(f, pss.block, pss.offset,
1051 last_stage,
1052 bytes_transferred);
1053 } else {
1054 pages = ram_save_page(f, pss.block, pss.offset, last_stage,
1055 bytes_transferred);
1058 /* if page is unmodified, continue to the next */
1059 if (pages > 0) {
1060 unsigned long *unsentmap;
1062 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1063 last_sent_block = pss.block;
1064 if (unsentmap) {
1065 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1070 } while (!pages && again);
1072 last_seen_block = pss.block;
1073 last_offset = pss.offset;
1075 return pages;
1078 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1080 uint64_t pages = size / TARGET_PAGE_SIZE;
1081 if (zero) {
1082 acct_info.dup_pages += pages;
1083 } else {
1084 acct_info.norm_pages += pages;
1085 bytes_transferred += size;
1086 qemu_update_position(f, size);
1090 static ram_addr_t ram_save_remaining(void)
1092 return migration_dirty_pages;
1095 uint64_t ram_bytes_remaining(void)
1097 return ram_save_remaining() * TARGET_PAGE_SIZE;
1100 uint64_t ram_bytes_transferred(void)
1102 return bytes_transferred;
1105 uint64_t ram_bytes_total(void)
1107 RAMBlock *block;
1108 uint64_t total = 0;
1110 rcu_read_lock();
1111 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1112 total += block->used_length;
1113 rcu_read_unlock();
1114 return total;
1117 void free_xbzrle_decoded_buf(void)
1119 g_free(xbzrle_decoded_buf);
1120 xbzrle_decoded_buf = NULL;
1123 static void migration_bitmap_free(struct BitmapRcu *bmap)
1125 g_free(bmap->bmap);
1126 g_free(bmap->unsentmap);
1127 g_free(bmap);
1130 static void ram_migration_cleanup(void *opaque)
1132 /* caller have hold iothread lock or is in a bh, so there is
1133 * no writing race against this migration_bitmap
1135 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1136 atomic_rcu_set(&migration_bitmap_rcu, NULL);
1137 if (bitmap) {
1138 memory_global_dirty_log_stop();
1139 call_rcu(bitmap, migration_bitmap_free, rcu);
1142 XBZRLE_cache_lock();
1143 if (XBZRLE.cache) {
1144 cache_fini(XBZRLE.cache);
1145 g_free(XBZRLE.encoded_buf);
1146 g_free(XBZRLE.current_buf);
1147 XBZRLE.cache = NULL;
1148 XBZRLE.encoded_buf = NULL;
1149 XBZRLE.current_buf = NULL;
1151 XBZRLE_cache_unlock();
1154 static void reset_ram_globals(void)
1156 last_seen_block = NULL;
1157 last_sent_block = NULL;
1158 last_offset = 0;
1159 last_version = ram_list.version;
1160 ram_bulk_stage = true;
1163 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1165 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1167 /* called in qemu main thread, so there is
1168 * no writing race against this migration_bitmap
1170 if (migration_bitmap_rcu) {
1171 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1172 bitmap = g_new(struct BitmapRcu, 1);
1173 bitmap->bmap = bitmap_new(new);
1175 /* prevent migration_bitmap content from being set bit
1176 * by migration_bitmap_sync_range() at the same time.
1177 * it is safe to migration if migration_bitmap is cleared bit
1178 * at the same time.
1180 qemu_mutex_lock(&migration_bitmap_mutex);
1181 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1182 bitmap_set(bitmap->bmap, old, new - old);
1184 /* We don't have a way to safely extend the sentmap
1185 * with RCU; so mark it as missing, entry to postcopy
1186 * will fail.
1188 bitmap->unsentmap = NULL;
1190 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
1191 qemu_mutex_unlock(&migration_bitmap_mutex);
1192 migration_dirty_pages += new - old;
1193 call_rcu(old_bitmap, migration_bitmap_free, rcu);
1198 * 'expected' is the value you expect the bitmap mostly to be full
1199 * of; it won't bother printing lines that are all this value.
1200 * If 'todump' is null the migration bitmap is dumped.
1202 void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1204 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1206 int64_t cur;
1207 int64_t linelen = 128;
1208 char linebuf[129];
1210 if (!todump) {
1211 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1214 for (cur = 0; cur < ram_pages; cur += linelen) {
1215 int64_t curb;
1216 bool found = false;
1218 * Last line; catch the case where the line length
1219 * is longer than remaining ram
1221 if (cur + linelen > ram_pages) {
1222 linelen = ram_pages - cur;
1224 for (curb = 0; curb < linelen; curb++) {
1225 bool thisbit = test_bit(cur + curb, todump);
1226 linebuf[curb] = thisbit ? '1' : '.';
1227 found = found || (thisbit != expected);
1229 if (found) {
1230 linebuf[curb] = '\0';
1231 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1236 /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1237 * long-running RCU critical section. When rcu-reclaims in the code
1238 * start to become numerous it will be necessary to reduce the
1239 * granularity of these critical sections.
1242 static int ram_save_setup(QEMUFile *f, void *opaque)
1244 RAMBlock *block;
1245 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1247 dirty_rate_high_cnt = 0;
1248 bitmap_sync_count = 0;
1249 migration_bitmap_sync_init();
1250 qemu_mutex_init(&migration_bitmap_mutex);
1252 if (migrate_use_xbzrle()) {
1253 XBZRLE_cache_lock();
1254 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1255 TARGET_PAGE_SIZE,
1256 TARGET_PAGE_SIZE);
1257 if (!XBZRLE.cache) {
1258 XBZRLE_cache_unlock();
1259 error_report("Error creating cache");
1260 return -1;
1262 XBZRLE_cache_unlock();
1264 /* We prefer not to abort if there is no memory */
1265 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1266 if (!XBZRLE.encoded_buf) {
1267 error_report("Error allocating encoded_buf");
1268 return -1;
1271 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1272 if (!XBZRLE.current_buf) {
1273 error_report("Error allocating current_buf");
1274 g_free(XBZRLE.encoded_buf);
1275 XBZRLE.encoded_buf = NULL;
1276 return -1;
1279 acct_clear();
1282 /* iothread lock needed for ram_list.dirty_memory[] */
1283 qemu_mutex_lock_iothread();
1284 qemu_mutex_lock_ramlist();
1285 rcu_read_lock();
1286 bytes_transferred = 0;
1287 reset_ram_globals();
1289 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1290 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
1291 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
1292 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
1294 if (migrate_postcopy_ram()) {
1295 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
1296 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
1300 * Count the total number of pages used by ram blocks not including any
1301 * gaps due to alignment or unplugs.
1303 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1305 memory_global_dirty_log_start();
1306 migration_bitmap_sync();
1307 qemu_mutex_unlock_ramlist();
1308 qemu_mutex_unlock_iothread();
1310 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1312 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1313 qemu_put_byte(f, strlen(block->idstr));
1314 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1315 qemu_put_be64(f, block->used_length);
1318 rcu_read_unlock();
1320 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
1321 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
1323 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1325 return 0;
1328 static int ram_save_iterate(QEMUFile *f, void *opaque)
1330 int ret;
1331 int i;
1332 int64_t t0;
1333 int pages_sent = 0;
1335 rcu_read_lock();
1336 if (ram_list.version != last_version) {
1337 reset_ram_globals();
1340 /* Read version before ram_list.blocks */
1341 smp_rmb();
1343 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
1345 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1346 i = 0;
1347 while ((ret = qemu_file_rate_limit(f)) == 0) {
1348 int pages;
1350 pages = ram_find_and_save_block(f, false, &bytes_transferred);
1351 /* no more pages to sent */
1352 if (pages == 0) {
1353 break;
1355 pages_sent += pages;
1356 acct_info.iterations++;
1358 /* we want to check in the 1st loop, just in case it was the 1st time
1359 and we had to sync the dirty bitmap.
1360 qemu_get_clock_ns() is a bit expensive, so we only check each some
1361 iterations
1363 if ((i & 63) == 0) {
1364 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
1365 if (t1 > MAX_WAIT) {
1366 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
1367 t1, i);
1368 break;
1371 i++;
1373 flush_compressed_data(f);
1374 rcu_read_unlock();
1377 * Must occur before EOS (or any QEMUFile operation)
1378 * because of RDMA protocol.
1380 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
1382 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1383 bytes_transferred += 8;
1385 ret = qemu_file_get_error(f);
1386 if (ret < 0) {
1387 return ret;
1390 return pages_sent;
1393 /* Called with iothread lock */
1394 static int ram_save_complete(QEMUFile *f, void *opaque)
1396 rcu_read_lock();
1398 migration_bitmap_sync();
1400 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
1402 /* try transferring iterative blocks of memory */
1404 /* flush all remaining blocks regardless of rate limiting */
1405 while (true) {
1406 int pages;
1408 pages = ram_find_and_save_block(f, true, &bytes_transferred);
1409 /* no more blocks to sent */
1410 if (pages == 0) {
1411 break;
1415 flush_compressed_data(f);
1416 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
1418 rcu_read_unlock();
1420 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1422 return 0;
1425 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
1426 uint64_t *non_postcopiable_pending,
1427 uint64_t *postcopiable_pending)
1429 uint64_t remaining_size;
1431 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
1433 if (remaining_size < max_size) {
1434 qemu_mutex_lock_iothread();
1435 rcu_read_lock();
1436 migration_bitmap_sync();
1437 rcu_read_unlock();
1438 qemu_mutex_unlock_iothread();
1439 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
1442 /* We can do postcopy, and all the data is postcopiable */
1443 *postcopiable_pending += remaining_size;
1446 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
1448 unsigned int xh_len;
1449 int xh_flags;
1451 if (!xbzrle_decoded_buf) {
1452 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
1455 /* extract RLE header */
1456 xh_flags = qemu_get_byte(f);
1457 xh_len = qemu_get_be16(f);
1459 if (xh_flags != ENCODING_FLAG_XBZRLE) {
1460 error_report("Failed to load XBZRLE page - wrong compression!");
1461 return -1;
1464 if (xh_len > TARGET_PAGE_SIZE) {
1465 error_report("Failed to load XBZRLE page - len overflow!");
1466 return -1;
1468 /* load data and decode */
1469 qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
1471 /* decode RLE */
1472 if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
1473 TARGET_PAGE_SIZE) == -1) {
1474 error_report("Failed to load XBZRLE page - decode error!");
1475 return -1;
1478 return 0;
1481 /* Must be called from within a rcu critical section.
1482 * Returns a pointer from within the RCU-protected ram_list.
1484 static inline void *host_from_stream_offset(QEMUFile *f,
1485 ram_addr_t offset,
1486 int flags)
1488 static RAMBlock *block = NULL;
1489 char id[256];
1490 uint8_t len;
1492 if (flags & RAM_SAVE_FLAG_CONTINUE) {
1493 if (!block || block->max_length <= offset) {
1494 error_report("Ack, bad migration stream!");
1495 return NULL;
1498 return block->host + offset;
1501 len = qemu_get_byte(f);
1502 qemu_get_buffer(f, (uint8_t *)id, len);
1503 id[len] = 0;
1505 block = qemu_ram_block_by_name(id);
1506 if (block && block->max_length > offset) {
1507 return block->host + offset;
1510 error_report("Can't find block %s", id);
1511 return NULL;
1515 * If a page (or a whole RDMA chunk) has been
1516 * determined to be zero, then zap it.
1518 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
1520 if (ch != 0 || !is_zero_range(host, size)) {
1521 memset(host, ch, size);
1525 static void *do_data_decompress(void *opaque)
1527 DecompressParam *param = opaque;
1528 unsigned long pagesize;
1530 while (!quit_decomp_thread) {
1531 qemu_mutex_lock(&param->mutex);
1532 while (!param->start && !quit_decomp_thread) {
1533 qemu_cond_wait(&param->cond, &param->mutex);
1534 pagesize = TARGET_PAGE_SIZE;
1535 if (!quit_decomp_thread) {
1536 /* uncompress() will return failed in some case, especially
1537 * when the page is dirted when doing the compression, it's
1538 * not a problem because the dirty page will be retransferred
1539 * and uncompress() won't break the data in other pages.
1541 uncompress((Bytef *)param->des, &pagesize,
1542 (const Bytef *)param->compbuf, param->len);
1544 param->start = false;
1546 qemu_mutex_unlock(&param->mutex);
1549 return NULL;
1552 void migrate_decompress_threads_create(void)
1554 int i, thread_count;
1556 thread_count = migrate_decompress_threads();
1557 decompress_threads = g_new0(QemuThread, thread_count);
1558 decomp_param = g_new0(DecompressParam, thread_count);
1559 compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
1560 quit_decomp_thread = false;
1561 for (i = 0; i < thread_count; i++) {
1562 qemu_mutex_init(&decomp_param[i].mutex);
1563 qemu_cond_init(&decomp_param[i].cond);
1564 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
1565 qemu_thread_create(decompress_threads + i, "decompress",
1566 do_data_decompress, decomp_param + i,
1567 QEMU_THREAD_JOINABLE);
1571 void migrate_decompress_threads_join(void)
1573 int i, thread_count;
1575 quit_decomp_thread = true;
1576 thread_count = migrate_decompress_threads();
1577 for (i = 0; i < thread_count; i++) {
1578 qemu_mutex_lock(&decomp_param[i].mutex);
1579 qemu_cond_signal(&decomp_param[i].cond);
1580 qemu_mutex_unlock(&decomp_param[i].mutex);
1582 for (i = 0; i < thread_count; i++) {
1583 qemu_thread_join(decompress_threads + i);
1584 qemu_mutex_destroy(&decomp_param[i].mutex);
1585 qemu_cond_destroy(&decomp_param[i].cond);
1586 g_free(decomp_param[i].compbuf);
1588 g_free(decompress_threads);
1589 g_free(decomp_param);
1590 g_free(compressed_data_buf);
1591 decompress_threads = NULL;
1592 decomp_param = NULL;
1593 compressed_data_buf = NULL;
1596 static void decompress_data_with_multi_threads(uint8_t *compbuf,
1597 void *host, int len)
1599 int idx, thread_count;
1601 thread_count = migrate_decompress_threads();
1602 while (true) {
1603 for (idx = 0; idx < thread_count; idx++) {
1604 if (!decomp_param[idx].start) {
1605 memcpy(decomp_param[idx].compbuf, compbuf, len);
1606 decomp_param[idx].des = host;
1607 decomp_param[idx].len = len;
1608 start_decompression(&decomp_param[idx]);
1609 break;
1612 if (idx < thread_count) {
1613 break;
1618 static int ram_load(QEMUFile *f, void *opaque, int version_id)
1620 int flags = 0, ret = 0;
1621 static uint64_t seq_iter;
1622 int len = 0;
1624 seq_iter++;
1626 if (version_id != 4) {
1627 ret = -EINVAL;
1630 /* This RCU critical section can be very long running.
1631 * When RCU reclaims in the code start to become numerous,
1632 * it will be necessary to reduce the granularity of this
1633 * critical section.
1635 rcu_read_lock();
1636 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
1637 ram_addr_t addr, total_ram_bytes;
1638 void *host = NULL;
1639 uint8_t ch;
1641 addr = qemu_get_be64(f);
1642 flags = addr & ~TARGET_PAGE_MASK;
1643 addr &= TARGET_PAGE_MASK;
1645 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
1646 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
1647 host = host_from_stream_offset(f, addr, flags);
1648 if (!host) {
1649 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
1650 ret = -EINVAL;
1651 break;
1655 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
1656 case RAM_SAVE_FLAG_MEM_SIZE:
1657 /* Synchronize RAM block list */
1658 total_ram_bytes = addr;
1659 while (!ret && total_ram_bytes) {
1660 RAMBlock *block;
1661 char id[256];
1662 ram_addr_t length;
1664 len = qemu_get_byte(f);
1665 qemu_get_buffer(f, (uint8_t *)id, len);
1666 id[len] = 0;
1667 length = qemu_get_be64(f);
1669 block = qemu_ram_block_by_name(id);
1670 if (block) {
1671 if (length != block->used_length) {
1672 Error *local_err = NULL;
1674 ret = qemu_ram_resize(block->offset, length,
1675 &local_err);
1676 if (local_err) {
1677 error_report_err(local_err);
1680 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
1681 block->idstr);
1682 } else {
1683 error_report("Unknown ramblock \"%s\", cannot "
1684 "accept migration", id);
1685 ret = -EINVAL;
1688 total_ram_bytes -= length;
1690 break;
1692 case RAM_SAVE_FLAG_COMPRESS:
1693 ch = qemu_get_byte(f);
1694 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
1695 break;
1697 case RAM_SAVE_FLAG_PAGE:
1698 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
1699 break;
1701 case RAM_SAVE_FLAG_COMPRESS_PAGE:
1702 len = qemu_get_be32(f);
1703 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
1704 error_report("Invalid compressed data length: %d", len);
1705 ret = -EINVAL;
1706 break;
1708 qemu_get_buffer(f, compressed_data_buf, len);
1709 decompress_data_with_multi_threads(compressed_data_buf, host, len);
1710 break;
1712 case RAM_SAVE_FLAG_XBZRLE:
1713 if (load_xbzrle(f, addr, host) < 0) {
1714 error_report("Failed to decompress XBZRLE page at "
1715 RAM_ADDR_FMT, addr);
1716 ret = -EINVAL;
1717 break;
1719 break;
1720 case RAM_SAVE_FLAG_EOS:
1721 /* normal exit */
1722 break;
1723 default:
1724 if (flags & RAM_SAVE_FLAG_HOOK) {
1725 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
1726 } else {
1727 error_report("Unknown combination of migration flags: %#x",
1728 flags);
1729 ret = -EINVAL;
1732 if (!ret) {
1733 ret = qemu_file_get_error(f);
1737 rcu_read_unlock();
1738 DPRINTF("Completed load of VM with exit code %d seq iteration "
1739 "%" PRIu64 "\n", ret, seq_iter);
1740 return ret;
1743 static SaveVMHandlers savevm_ram_handlers = {
1744 .save_live_setup = ram_save_setup,
1745 .save_live_iterate = ram_save_iterate,
1746 .save_live_complete_postcopy = ram_save_complete,
1747 .save_live_complete_precopy = ram_save_complete,
1748 .save_live_pending = ram_save_pending,
1749 .load_state = ram_load,
1750 .cleanup = ram_migration_cleanup,
1753 void ram_mig_init(void)
1755 qemu_mutex_init(&XBZRLE.lock);
1756 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);