migration: refine the decompression code
[qemu/ar7.git] / migration / ram.c
bloba44b4f00913dc0a453fde5320a91215d133de1f4
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
28 #include "qemu/osdep.h"
29 #include "qemu-common.h"
30 #include "cpu.h"
31 #include <zlib.h>
32 #include "qapi-event.h"
33 #include "qemu/cutils.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "qemu/timer.h"
37 #include "qemu/main-loop.h"
38 #include "migration/migration.h"
39 #include "migration/postcopy-ram.h"
40 #include "exec/address-spaces.h"
41 #include "migration/page_cache.h"
42 #include "qemu/error-report.h"
43 #include "trace.h"
44 #include "exec/ram_addr.h"
45 #include "qemu/rcu_queue.h"
47 #ifdef DEBUG_MIGRATION_RAM
48 #define DPRINTF(fmt, ...) \
49 do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
50 #else
51 #define DPRINTF(fmt, ...) \
52 do { } while (0)
53 #endif
55 static int dirty_rate_high_cnt;
57 static uint64_t bitmap_sync_count;
59 /***********************************************************/
60 /* ram save/restore */
62 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
63 #define RAM_SAVE_FLAG_COMPRESS 0x02
64 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
65 #define RAM_SAVE_FLAG_PAGE 0x08
66 #define RAM_SAVE_FLAG_EOS 0x10
67 #define RAM_SAVE_FLAG_CONTINUE 0x20
68 #define RAM_SAVE_FLAG_XBZRLE 0x40
69 /* 0x80 is reserved in migration.h start with 0x100 next */
70 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
72 static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
74 static inline bool is_zero_range(uint8_t *p, uint64_t size)
76 return buffer_find_nonzero_offset(p, size) == size;
79 /* struct contains XBZRLE cache and a static page
80 used by the compression */
81 static struct {
82 /* buffer used for XBZRLE encoding */
83 uint8_t *encoded_buf;
84 /* buffer for storing page content */
85 uint8_t *current_buf;
86 /* Cache for XBZRLE, Protected by lock. */
87 PageCache *cache;
88 QemuMutex lock;
89 } XBZRLE;
91 /* buffer used for XBZRLE decoding */
92 static uint8_t *xbzrle_decoded_buf;
94 static void XBZRLE_cache_lock(void)
96 if (migrate_use_xbzrle())
97 qemu_mutex_lock(&XBZRLE.lock);
100 static void XBZRLE_cache_unlock(void)
102 if (migrate_use_xbzrle())
103 qemu_mutex_unlock(&XBZRLE.lock);
107 * called from qmp_migrate_set_cache_size in main thread, possibly while
108 * a migration is in progress.
109 * A running migration maybe using the cache and might finish during this
110 * call, hence changes to the cache are protected by XBZRLE.lock().
112 int64_t xbzrle_cache_resize(int64_t new_size)
114 PageCache *new_cache;
115 int64_t ret;
117 if (new_size < TARGET_PAGE_SIZE) {
118 return -1;
121 XBZRLE_cache_lock();
123 if (XBZRLE.cache != NULL) {
124 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
125 goto out_new_size;
127 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
128 TARGET_PAGE_SIZE);
129 if (!new_cache) {
130 error_report("Error creating cache");
131 ret = -1;
132 goto out;
135 cache_fini(XBZRLE.cache);
136 XBZRLE.cache = new_cache;
139 out_new_size:
140 ret = pow2floor(new_size);
141 out:
142 XBZRLE_cache_unlock();
143 return ret;
146 /* accounting for migration statistics */
147 typedef struct AccountingInfo {
148 uint64_t dup_pages;
149 uint64_t skipped_pages;
150 uint64_t norm_pages;
151 uint64_t iterations;
152 uint64_t xbzrle_bytes;
153 uint64_t xbzrle_pages;
154 uint64_t xbzrle_cache_miss;
155 double xbzrle_cache_miss_rate;
156 uint64_t xbzrle_overflows;
157 } AccountingInfo;
159 static AccountingInfo acct_info;
161 static void acct_clear(void)
163 memset(&acct_info, 0, sizeof(acct_info));
166 uint64_t dup_mig_bytes_transferred(void)
168 return acct_info.dup_pages * TARGET_PAGE_SIZE;
171 uint64_t dup_mig_pages_transferred(void)
173 return acct_info.dup_pages;
176 uint64_t skipped_mig_bytes_transferred(void)
178 return acct_info.skipped_pages * TARGET_PAGE_SIZE;
181 uint64_t skipped_mig_pages_transferred(void)
183 return acct_info.skipped_pages;
186 uint64_t norm_mig_bytes_transferred(void)
188 return acct_info.norm_pages * TARGET_PAGE_SIZE;
191 uint64_t norm_mig_pages_transferred(void)
193 return acct_info.norm_pages;
196 uint64_t xbzrle_mig_bytes_transferred(void)
198 return acct_info.xbzrle_bytes;
201 uint64_t xbzrle_mig_pages_transferred(void)
203 return acct_info.xbzrle_pages;
206 uint64_t xbzrle_mig_pages_cache_miss(void)
208 return acct_info.xbzrle_cache_miss;
211 double xbzrle_mig_cache_miss_rate(void)
213 return acct_info.xbzrle_cache_miss_rate;
216 uint64_t xbzrle_mig_pages_overflow(void)
218 return acct_info.xbzrle_overflows;
221 /* This is the last block that we have visited serching for dirty pages
223 static RAMBlock *last_seen_block;
224 /* This is the last block from where we have sent data */
225 static RAMBlock *last_sent_block;
226 static ram_addr_t last_offset;
227 static QemuMutex migration_bitmap_mutex;
228 static uint64_t migration_dirty_pages;
229 static uint32_t last_version;
230 static bool ram_bulk_stage;
232 /* used by the search for pages to send */
233 struct PageSearchStatus {
234 /* Current block being searched */
235 RAMBlock *block;
236 /* Current offset to search from */
237 ram_addr_t offset;
238 /* Set once we wrap around */
239 bool complete_round;
241 typedef struct PageSearchStatus PageSearchStatus;
243 static struct BitmapRcu {
244 struct rcu_head rcu;
245 /* Main migration bitmap */
246 unsigned long *bmap;
247 /* bitmap of pages that haven't been sent even once
248 * only maintained and used in postcopy at the moment
249 * where it's used to send the dirtymap at the start
250 * of the postcopy phase
252 unsigned long *unsentmap;
253 } *migration_bitmap_rcu;
255 struct CompressParam {
256 bool done;
257 bool quit;
258 QEMUFile *file;
259 QemuMutex mutex;
260 QemuCond cond;
261 RAMBlock *block;
262 ram_addr_t offset;
264 typedef struct CompressParam CompressParam;
266 struct DecompressParam {
267 bool done;
268 bool quit;
269 QemuMutex mutex;
270 QemuCond cond;
271 void *des;
272 uint8_t *compbuf;
273 int len;
275 typedef struct DecompressParam DecompressParam;
277 static CompressParam *comp_param;
278 static QemuThread *compress_threads;
279 /* comp_done_cond is used to wake up the migration thread when
280 * one of the compression threads has finished the compression.
281 * comp_done_lock is used to co-work with comp_done_cond.
283 static QemuMutex *comp_done_lock;
284 static QemuCond *comp_done_cond;
285 /* The empty QEMUFileOps will be used by file in CompressParam */
286 static const QEMUFileOps empty_ops = { };
288 static bool compression_switch;
289 static DecompressParam *decomp_param;
290 static QemuThread *decompress_threads;
291 static QemuMutex decomp_done_lock;
292 static QemuCond decomp_done_cond;
294 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
295 ram_addr_t offset);
297 static void *do_data_compress(void *opaque)
299 CompressParam *param = opaque;
300 RAMBlock *block;
301 ram_addr_t offset;
303 qemu_mutex_lock(&param->mutex);
304 while (!param->quit) {
305 if (param->block) {
306 block = param->block;
307 offset = param->offset;
308 param->block = NULL;
309 qemu_mutex_unlock(&param->mutex);
311 do_compress_ram_page(param->file, block, offset);
313 qemu_mutex_lock(comp_done_lock);
314 param->done = true;
315 qemu_cond_signal(comp_done_cond);
316 qemu_mutex_unlock(comp_done_lock);
318 qemu_mutex_lock(&param->mutex);
319 } else {
320 qemu_cond_wait(&param->cond, &param->mutex);
323 qemu_mutex_unlock(&param->mutex);
325 return NULL;
328 static inline void terminate_compression_threads(void)
330 int idx, thread_count;
332 thread_count = migrate_compress_threads();
333 for (idx = 0; idx < thread_count; idx++) {
334 qemu_mutex_lock(&comp_param[idx].mutex);
335 comp_param[idx].quit = true;
336 qemu_cond_signal(&comp_param[idx].cond);
337 qemu_mutex_unlock(&comp_param[idx].mutex);
341 void migrate_compress_threads_join(void)
343 int i, thread_count;
345 if (!migrate_use_compression()) {
346 return;
348 terminate_compression_threads();
349 thread_count = migrate_compress_threads();
350 for (i = 0; i < thread_count; i++) {
351 qemu_thread_join(compress_threads + i);
352 qemu_fclose(comp_param[i].file);
353 qemu_mutex_destroy(&comp_param[i].mutex);
354 qemu_cond_destroy(&comp_param[i].cond);
356 qemu_mutex_destroy(comp_done_lock);
357 qemu_cond_destroy(comp_done_cond);
358 g_free(compress_threads);
359 g_free(comp_param);
360 g_free(comp_done_cond);
361 g_free(comp_done_lock);
362 compress_threads = NULL;
363 comp_param = NULL;
364 comp_done_cond = NULL;
365 comp_done_lock = NULL;
368 void migrate_compress_threads_create(void)
370 int i, thread_count;
372 if (!migrate_use_compression()) {
373 return;
375 compression_switch = true;
376 thread_count = migrate_compress_threads();
377 compress_threads = g_new0(QemuThread, thread_count);
378 comp_param = g_new0(CompressParam, thread_count);
379 comp_done_cond = g_new0(QemuCond, 1);
380 comp_done_lock = g_new0(QemuMutex, 1);
381 qemu_cond_init(comp_done_cond);
382 qemu_mutex_init(comp_done_lock);
383 for (i = 0; i < thread_count; i++) {
384 /* com_param[i].file is just used as a dummy buffer to save data, set
385 * it's ops to empty.
387 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
388 comp_param[i].done = true;
389 comp_param[i].quit = false;
390 qemu_mutex_init(&comp_param[i].mutex);
391 qemu_cond_init(&comp_param[i].cond);
392 qemu_thread_create(compress_threads + i, "compress",
393 do_data_compress, comp_param + i,
394 QEMU_THREAD_JOINABLE);
399 * save_page_header: Write page header to wire
401 * If this is the 1st block, it also writes the block identification
403 * Returns: Number of bytes written
405 * @f: QEMUFile where to send the data
406 * @block: block that contains the page we want to send
407 * @offset: offset inside the block for the page
408 * in the lower bits, it contains flags
410 static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
412 size_t size, len;
414 qemu_put_be64(f, offset);
415 size = 8;
417 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
418 len = strlen(block->idstr);
419 qemu_put_byte(f, len);
420 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
421 size += 1 + len;
423 return size;
426 /* Reduce amount of guest cpu execution to hopefully slow down memory writes.
427 * If guest dirty memory rate is reduced below the rate at which we can
428 * transfer pages to the destination then we should be able to complete
429 * migration. Some workloads dirty memory way too fast and will not effectively
430 * converge, even with auto-converge.
432 static void mig_throttle_guest_down(void)
434 MigrationState *s = migrate_get_current();
435 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
436 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
438 /* We have not started throttling yet. Let's start it. */
439 if (!cpu_throttle_active()) {
440 cpu_throttle_set(pct_initial);
441 } else {
442 /* Throttling already on, just increase the rate */
443 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
447 /* Update the xbzrle cache to reflect a page that's been sent as all 0.
448 * The important thing is that a stale (not-yet-0'd) page be replaced
449 * by the new data.
450 * As a bonus, if the page wasn't in the cache it gets added so that
451 * when a small write is made into the 0'd page it gets XBZRLE sent
453 static void xbzrle_cache_zero_page(ram_addr_t current_addr)
455 if (ram_bulk_stage || !migrate_use_xbzrle()) {
456 return;
459 /* We don't care if this fails to allocate a new cache page
460 * as long as it updated an old one */
461 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
462 bitmap_sync_count);
465 #define ENCODING_FLAG_XBZRLE 0x1
468 * save_xbzrle_page: compress and send current page
470 * Returns: 1 means that we wrote the page
471 * 0 means that page is identical to the one already sent
472 * -1 means that xbzrle would be longer than normal
474 * @f: QEMUFile where to send the data
475 * @current_data:
476 * @current_addr:
477 * @block: block that contains the page we want to send
478 * @offset: offset inside the block for the page
479 * @last_stage: if we are at the completion stage
480 * @bytes_transferred: increase it with the number of transferred bytes
482 static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
483 ram_addr_t current_addr, RAMBlock *block,
484 ram_addr_t offset, bool last_stage,
485 uint64_t *bytes_transferred)
487 int encoded_len = 0, bytes_xbzrle;
488 uint8_t *prev_cached_page;
490 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
491 acct_info.xbzrle_cache_miss++;
492 if (!last_stage) {
493 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
494 bitmap_sync_count) == -1) {
495 return -1;
496 } else {
497 /* update *current_data when the page has been
498 inserted into cache */
499 *current_data = get_cached_data(XBZRLE.cache, current_addr);
502 return -1;
505 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
507 /* save current buffer into memory */
508 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
510 /* XBZRLE encoding (if there is no overflow) */
511 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
512 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
513 TARGET_PAGE_SIZE);
514 if (encoded_len == 0) {
515 DPRINTF("Skipping unmodified page\n");
516 return 0;
517 } else if (encoded_len == -1) {
518 DPRINTF("Overflow\n");
519 acct_info.xbzrle_overflows++;
520 /* update data in the cache */
521 if (!last_stage) {
522 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
523 *current_data = prev_cached_page;
525 return -1;
528 /* we need to update the data in the cache, in order to get the same data */
529 if (!last_stage) {
530 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
533 /* Send XBZRLE based compressed page */
534 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
535 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
536 qemu_put_be16(f, encoded_len);
537 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
538 bytes_xbzrle += encoded_len + 1 + 2;
539 acct_info.xbzrle_pages++;
540 acct_info.xbzrle_bytes += bytes_xbzrle;
541 *bytes_transferred += bytes_xbzrle;
543 return 1;
546 /* Called with rcu_read_lock() to protect migration_bitmap
547 * rb: The RAMBlock to search for dirty pages in
548 * start: Start address (typically so we can continue from previous page)
549 * ram_addr_abs: Pointer into which to store the address of the dirty page
550 * within the global ram_addr space
552 * Returns: byte offset within memory region of the start of a dirty page
554 static inline
555 ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb,
556 ram_addr_t start,
557 ram_addr_t *ram_addr_abs)
559 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
560 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
561 uint64_t rb_size = rb->used_length;
562 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
563 unsigned long *bitmap;
565 unsigned long next;
567 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
568 if (ram_bulk_stage && nr > base) {
569 next = nr + 1;
570 } else {
571 next = find_next_bit(bitmap, size, nr);
574 *ram_addr_abs = next << TARGET_PAGE_BITS;
575 return (next - base) << TARGET_PAGE_BITS;
578 static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
580 bool ret;
581 int nr = addr >> TARGET_PAGE_BITS;
582 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
584 ret = test_and_clear_bit(nr, bitmap);
586 if (ret) {
587 migration_dirty_pages--;
589 return ret;
592 static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
594 unsigned long *bitmap;
595 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
596 migration_dirty_pages +=
597 cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
600 /* Fix me: there are too many global variables used in migration process. */
601 static int64_t start_time;
602 static int64_t bytes_xfer_prev;
603 static int64_t num_dirty_pages_period;
604 static uint64_t xbzrle_cache_miss_prev;
605 static uint64_t iterations_prev;
607 static void migration_bitmap_sync_init(void)
609 start_time = 0;
610 bytes_xfer_prev = 0;
611 num_dirty_pages_period = 0;
612 xbzrle_cache_miss_prev = 0;
613 iterations_prev = 0;
616 static void migration_bitmap_sync(void)
618 RAMBlock *block;
619 uint64_t num_dirty_pages_init = migration_dirty_pages;
620 MigrationState *s = migrate_get_current();
621 int64_t end_time;
622 int64_t bytes_xfer_now;
624 bitmap_sync_count++;
626 if (!bytes_xfer_prev) {
627 bytes_xfer_prev = ram_bytes_transferred();
630 if (!start_time) {
631 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
634 trace_migration_bitmap_sync_start();
635 address_space_sync_dirty_bitmap(&address_space_memory);
637 qemu_mutex_lock(&migration_bitmap_mutex);
638 rcu_read_lock();
639 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
640 migration_bitmap_sync_range(block->offset, block->used_length);
642 rcu_read_unlock();
643 qemu_mutex_unlock(&migration_bitmap_mutex);
645 trace_migration_bitmap_sync_end(migration_dirty_pages
646 - num_dirty_pages_init);
647 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
648 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
650 /* more than 1 second = 1000 millisecons */
651 if (end_time > start_time + 1000) {
652 if (migrate_auto_converge()) {
653 /* The following detection logic can be refined later. For now:
654 Check to see if the dirtied bytes is 50% more than the approx.
655 amount of bytes that just got transferred since the last time we
656 were in this routine. If that happens twice, start or increase
657 throttling */
658 bytes_xfer_now = ram_bytes_transferred();
660 if (s->dirty_pages_rate &&
661 (num_dirty_pages_period * TARGET_PAGE_SIZE >
662 (bytes_xfer_now - bytes_xfer_prev)/2) &&
663 (dirty_rate_high_cnt++ >= 2)) {
664 trace_migration_throttle();
665 dirty_rate_high_cnt = 0;
666 mig_throttle_guest_down();
668 bytes_xfer_prev = bytes_xfer_now;
671 if (migrate_use_xbzrle()) {
672 if (iterations_prev != acct_info.iterations) {
673 acct_info.xbzrle_cache_miss_rate =
674 (double)(acct_info.xbzrle_cache_miss -
675 xbzrle_cache_miss_prev) /
676 (acct_info.iterations - iterations_prev);
678 iterations_prev = acct_info.iterations;
679 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
681 s->dirty_pages_rate = num_dirty_pages_period * 1000
682 / (end_time - start_time);
683 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
684 start_time = end_time;
685 num_dirty_pages_period = 0;
687 s->dirty_sync_count = bitmap_sync_count;
688 if (migrate_use_events()) {
689 qapi_event_send_migration_pass(bitmap_sync_count, NULL);
694 * save_zero_page: Send the zero page to the stream
696 * Returns: Number of pages written.
698 * @f: QEMUFile where to send the data
699 * @block: block that contains the page we want to send
700 * @offset: offset inside the block for the page
701 * @p: pointer to the page
702 * @bytes_transferred: increase it with the number of transferred bytes
704 static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
705 uint8_t *p, uint64_t *bytes_transferred)
707 int pages = -1;
709 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
710 acct_info.dup_pages++;
711 *bytes_transferred += save_page_header(f, block,
712 offset | RAM_SAVE_FLAG_COMPRESS);
713 qemu_put_byte(f, 0);
714 *bytes_transferred += 1;
715 pages = 1;
718 return pages;
722 * ram_save_page: Send the given page to the stream
724 * Returns: Number of pages written.
725 * < 0 - error
726 * >=0 - Number of pages written - this might legally be 0
727 * if xbzrle noticed the page was the same.
729 * @f: QEMUFile where to send the data
730 * @block: block that contains the page we want to send
731 * @offset: offset inside the block for the page
732 * @last_stage: if we are at the completion stage
733 * @bytes_transferred: increase it with the number of transferred bytes
735 static int ram_save_page(QEMUFile *f, PageSearchStatus *pss,
736 bool last_stage, uint64_t *bytes_transferred)
738 int pages = -1;
739 uint64_t bytes_xmit;
740 ram_addr_t current_addr;
741 uint8_t *p;
742 int ret;
743 bool send_async = true;
744 RAMBlock *block = pss->block;
745 ram_addr_t offset = pss->offset;
747 p = block->host + offset;
749 /* In doubt sent page as normal */
750 bytes_xmit = 0;
751 ret = ram_control_save_page(f, block->offset,
752 offset, TARGET_PAGE_SIZE, &bytes_xmit);
753 if (bytes_xmit) {
754 *bytes_transferred += bytes_xmit;
755 pages = 1;
758 XBZRLE_cache_lock();
760 current_addr = block->offset + offset;
762 if (block == last_sent_block) {
763 offset |= RAM_SAVE_FLAG_CONTINUE;
765 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
766 if (ret != RAM_SAVE_CONTROL_DELAYED) {
767 if (bytes_xmit > 0) {
768 acct_info.norm_pages++;
769 } else if (bytes_xmit == 0) {
770 acct_info.dup_pages++;
773 } else {
774 pages = save_zero_page(f, block, offset, p, bytes_transferred);
775 if (pages > 0) {
776 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
777 * page would be stale
779 xbzrle_cache_zero_page(current_addr);
780 } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
781 pages = save_xbzrle_page(f, &p, current_addr, block,
782 offset, last_stage, bytes_transferred);
783 if (!last_stage) {
784 /* Can't send this cached data async, since the cache page
785 * might get updated before it gets to the wire
787 send_async = false;
792 /* XBZRLE overflow or normal page */
793 if (pages == -1) {
794 *bytes_transferred += save_page_header(f, block,
795 offset | RAM_SAVE_FLAG_PAGE);
796 if (send_async) {
797 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
798 } else {
799 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
801 *bytes_transferred += TARGET_PAGE_SIZE;
802 pages = 1;
803 acct_info.norm_pages++;
806 XBZRLE_cache_unlock();
808 return pages;
811 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
812 ram_addr_t offset)
814 int bytes_sent, blen;
815 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
817 bytes_sent = save_page_header(f, block, offset |
818 RAM_SAVE_FLAG_COMPRESS_PAGE);
819 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
820 migrate_compress_level());
821 if (blen < 0) {
822 bytes_sent = 0;
823 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
824 error_report("compressed data failed!");
825 } else {
826 bytes_sent += blen;
829 return bytes_sent;
832 static uint64_t bytes_transferred;
834 static void flush_compressed_data(QEMUFile *f)
836 int idx, len, thread_count;
838 if (!migrate_use_compression()) {
839 return;
841 thread_count = migrate_compress_threads();
843 qemu_mutex_lock(comp_done_lock);
844 for (idx = 0; idx < thread_count; idx++) {
845 while (!comp_param[idx].done) {
846 qemu_cond_wait(comp_done_cond, comp_done_lock);
849 qemu_mutex_unlock(comp_done_lock);
851 for (idx = 0; idx < thread_count; idx++) {
852 qemu_mutex_lock(&comp_param[idx].mutex);
853 if (!comp_param[idx].quit) {
854 len = qemu_put_qemu_file(f, comp_param[idx].file);
855 bytes_transferred += len;
857 qemu_mutex_unlock(&comp_param[idx].mutex);
861 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
862 ram_addr_t offset)
864 param->block = block;
865 param->offset = offset;
868 static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
869 ram_addr_t offset,
870 uint64_t *bytes_transferred)
872 int idx, thread_count, bytes_xmit = -1, pages = -1;
874 thread_count = migrate_compress_threads();
875 qemu_mutex_lock(comp_done_lock);
876 while (true) {
877 for (idx = 0; idx < thread_count; idx++) {
878 if (comp_param[idx].done) {
879 comp_param[idx].done = false;
880 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
881 qemu_mutex_lock(&comp_param[idx].mutex);
882 set_compress_params(&comp_param[idx], block, offset);
883 qemu_cond_signal(&comp_param[idx].cond);
884 qemu_mutex_unlock(&comp_param[idx].mutex);
885 pages = 1;
886 acct_info.norm_pages++;
887 *bytes_transferred += bytes_xmit;
888 break;
891 if (pages > 0) {
892 break;
893 } else {
894 qemu_cond_wait(comp_done_cond, comp_done_lock);
897 qemu_mutex_unlock(comp_done_lock);
899 return pages;
903 * ram_save_compressed_page: compress the given page and send it to the stream
905 * Returns: Number of pages written.
907 * @f: QEMUFile where to send the data
908 * @block: block that contains the page we want to send
909 * @offset: offset inside the block for the page
910 * @last_stage: if we are at the completion stage
911 * @bytes_transferred: increase it with the number of transferred bytes
913 static int ram_save_compressed_page(QEMUFile *f, PageSearchStatus *pss,
914 bool last_stage,
915 uint64_t *bytes_transferred)
917 int pages = -1;
918 uint64_t bytes_xmit = 0;
919 uint8_t *p;
920 int ret, blen;
921 RAMBlock *block = pss->block;
922 ram_addr_t offset = pss->offset;
924 p = block->host + offset;
926 ret = ram_control_save_page(f, block->offset,
927 offset, TARGET_PAGE_SIZE, &bytes_xmit);
928 if (bytes_xmit) {
929 *bytes_transferred += bytes_xmit;
930 pages = 1;
932 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
933 if (ret != RAM_SAVE_CONTROL_DELAYED) {
934 if (bytes_xmit > 0) {
935 acct_info.norm_pages++;
936 } else if (bytes_xmit == 0) {
937 acct_info.dup_pages++;
940 } else {
941 /* When starting the process of a new block, the first page of
942 * the block should be sent out before other pages in the same
943 * block, and all the pages in last block should have been sent
944 * out, keeping this order is important, because the 'cont' flag
945 * is used to avoid resending the block name.
947 if (block != last_sent_block) {
948 flush_compressed_data(f);
949 pages = save_zero_page(f, block, offset, p, bytes_transferred);
950 if (pages == -1) {
951 /* Make sure the first page is sent out before other pages */
952 bytes_xmit = save_page_header(f, block, offset |
953 RAM_SAVE_FLAG_COMPRESS_PAGE);
954 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
955 migrate_compress_level());
956 if (blen > 0) {
957 *bytes_transferred += bytes_xmit + blen;
958 acct_info.norm_pages++;
959 pages = 1;
960 } else {
961 qemu_file_set_error(f, blen);
962 error_report("compressed data failed!");
965 } else {
966 offset |= RAM_SAVE_FLAG_CONTINUE;
967 pages = save_zero_page(f, block, offset, p, bytes_transferred);
968 if (pages == -1) {
969 pages = compress_page_with_multi_thread(f, block, offset,
970 bytes_transferred);
975 return pages;
979 * Find the next dirty page and update any state associated with
980 * the search process.
982 * Returns: True if a page is found
984 * @f: Current migration stream.
985 * @pss: Data about the state of the current dirty page scan.
986 * @*again: Set to false if the search has scanned the whole of RAM
987 * *ram_addr_abs: Pointer into which to store the address of the dirty page
988 * within the global ram_addr space
990 static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
991 bool *again, ram_addr_t *ram_addr_abs)
993 pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset,
994 ram_addr_abs);
995 if (pss->complete_round && pss->block == last_seen_block &&
996 pss->offset >= last_offset) {
998 * We've been once around the RAM and haven't found anything.
999 * Give up.
1001 *again = false;
1002 return false;
1004 if (pss->offset >= pss->block->used_length) {
1005 /* Didn't find anything in this RAM Block */
1006 pss->offset = 0;
1007 pss->block = QLIST_NEXT_RCU(pss->block, next);
1008 if (!pss->block) {
1009 /* Hit the end of the list */
1010 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1011 /* Flag that we've looped */
1012 pss->complete_round = true;
1013 ram_bulk_stage = false;
1014 if (migrate_use_xbzrle()) {
1015 /* If xbzrle is on, stop using the data compression at this
1016 * point. In theory, xbzrle can do better than compression.
1018 flush_compressed_data(f);
1019 compression_switch = false;
1022 /* Didn't find anything this time, but try again on the new block */
1023 *again = true;
1024 return false;
1025 } else {
1026 /* Can go around again, but... */
1027 *again = true;
1028 /* We've found something so probably don't need to */
1029 return true;
1034 * Helper for 'get_queued_page' - gets a page off the queue
1035 * ms: MigrationState in
1036 * *offset: Used to return the offset within the RAMBlock
1037 * ram_addr_abs: global offset in the dirty/sent bitmaps
1039 * Returns: block (or NULL if none available)
1041 static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1042 ram_addr_t *ram_addr_abs)
1044 RAMBlock *block = NULL;
1046 qemu_mutex_lock(&ms->src_page_req_mutex);
1047 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1048 struct MigrationSrcPageRequest *entry =
1049 QSIMPLEQ_FIRST(&ms->src_page_requests);
1050 block = entry->rb;
1051 *offset = entry->offset;
1052 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1053 TARGET_PAGE_MASK;
1055 if (entry->len > TARGET_PAGE_SIZE) {
1056 entry->len -= TARGET_PAGE_SIZE;
1057 entry->offset += TARGET_PAGE_SIZE;
1058 } else {
1059 memory_region_unref(block->mr);
1060 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1061 g_free(entry);
1064 qemu_mutex_unlock(&ms->src_page_req_mutex);
1066 return block;
1070 * Unqueue a page from the queue fed by postcopy page requests; skips pages
1071 * that are already sent (!dirty)
1073 * ms: MigrationState in
1074 * pss: PageSearchStatus structure updated with found block/offset
1075 * ram_addr_abs: global offset in the dirty/sent bitmaps
1077 * Returns: true if a queued page is found
1079 static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
1080 ram_addr_t *ram_addr_abs)
1082 RAMBlock *block;
1083 ram_addr_t offset;
1084 bool dirty;
1086 do {
1087 block = unqueue_page(ms, &offset, ram_addr_abs);
1089 * We're sending this page, and since it's postcopy nothing else
1090 * will dirty it, and we must make sure it doesn't get sent again
1091 * even if this queue request was received after the background
1092 * search already sent it.
1094 if (block) {
1095 unsigned long *bitmap;
1096 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1097 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1098 if (!dirty) {
1099 trace_get_queued_page_not_dirty(
1100 block->idstr, (uint64_t)offset,
1101 (uint64_t)*ram_addr_abs,
1102 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1103 atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1104 } else {
1105 trace_get_queued_page(block->idstr,
1106 (uint64_t)offset,
1107 (uint64_t)*ram_addr_abs);
1111 } while (block && !dirty);
1113 if (block) {
1115 * As soon as we start servicing pages out of order, then we have
1116 * to kill the bulk stage, since the bulk stage assumes
1117 * in (migration_bitmap_find_and_reset_dirty) that every page is
1118 * dirty, that's no longer true.
1120 ram_bulk_stage = false;
1123 * We want the background search to continue from the queued page
1124 * since the guest is likely to want other pages near to the page
1125 * it just requested.
1127 pss->block = block;
1128 pss->offset = offset;
1131 return !!block;
1135 * flush_page_queue: Flush any remaining pages in the ram request queue
1136 * it should be empty at the end anyway, but in error cases there may be
1137 * some left.
1139 * ms: MigrationState
1141 void flush_page_queue(MigrationState *ms)
1143 struct MigrationSrcPageRequest *mspr, *next_mspr;
1144 /* This queue generally should be empty - but in the case of a failed
1145 * migration might have some droppings in.
1147 rcu_read_lock();
1148 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1149 memory_region_unref(mspr->rb->mr);
1150 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1151 g_free(mspr);
1153 rcu_read_unlock();
1157 * Queue the pages for transmission, e.g. a request from postcopy destination
1158 * ms: MigrationStatus in which the queue is held
1159 * rbname: The RAMBlock the request is for - may be NULL (to mean reuse last)
1160 * start: Offset from the start of the RAMBlock
1161 * len: Length (in bytes) to send
1162 * Return: 0 on success
1164 int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1165 ram_addr_t start, ram_addr_t len)
1167 RAMBlock *ramblock;
1169 ms->postcopy_requests++;
1170 rcu_read_lock();
1171 if (!rbname) {
1172 /* Reuse last RAMBlock */
1173 ramblock = ms->last_req_rb;
1175 if (!ramblock) {
1177 * Shouldn't happen, we can't reuse the last RAMBlock if
1178 * it's the 1st request.
1180 error_report("ram_save_queue_pages no previous block");
1181 goto err;
1183 } else {
1184 ramblock = qemu_ram_block_by_name(rbname);
1186 if (!ramblock) {
1187 /* We shouldn't be asked for a non-existent RAMBlock */
1188 error_report("ram_save_queue_pages no block '%s'", rbname);
1189 goto err;
1191 ms->last_req_rb = ramblock;
1193 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1194 if (start+len > ramblock->used_length) {
1195 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1196 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1197 __func__, start, len, ramblock->used_length);
1198 goto err;
1201 struct MigrationSrcPageRequest *new_entry =
1202 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1203 new_entry->rb = ramblock;
1204 new_entry->offset = start;
1205 new_entry->len = len;
1207 memory_region_ref(ramblock->mr);
1208 qemu_mutex_lock(&ms->src_page_req_mutex);
1209 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1210 qemu_mutex_unlock(&ms->src_page_req_mutex);
1211 rcu_read_unlock();
1213 return 0;
1215 err:
1216 rcu_read_unlock();
1217 return -1;
1221 * ram_save_target_page: Save one target page
1224 * @f: QEMUFile where to send the data
1225 * @block: pointer to block that contains the page we want to send
1226 * @offset: offset inside the block for the page;
1227 * @last_stage: if we are at the completion stage
1228 * @bytes_transferred: increase it with the number of transferred bytes
1229 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1231 * Returns: Number of pages written.
1233 static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
1234 PageSearchStatus *pss,
1235 bool last_stage,
1236 uint64_t *bytes_transferred,
1237 ram_addr_t dirty_ram_abs)
1239 int res = 0;
1241 /* Check the pages is dirty and if it is send it */
1242 if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1243 unsigned long *unsentmap;
1244 if (compression_switch && migrate_use_compression()) {
1245 res = ram_save_compressed_page(f, pss,
1246 last_stage,
1247 bytes_transferred);
1248 } else {
1249 res = ram_save_page(f, pss, last_stage,
1250 bytes_transferred);
1253 if (res < 0) {
1254 return res;
1256 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1257 if (unsentmap) {
1258 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1260 /* Only update last_sent_block if a block was actually sent; xbzrle
1261 * might have decided the page was identical so didn't bother writing
1262 * to the stream.
1264 if (res > 0) {
1265 last_sent_block = pss->block;
1269 return res;
1273 * ram_save_host_page: Starting at *offset send pages up to the end
1274 * of the current host page. It's valid for the initial
1275 * offset to point into the middle of a host page
1276 * in which case the remainder of the hostpage is sent.
1277 * Only dirty target pages are sent.
1279 * Returns: Number of pages written.
1281 * @f: QEMUFile where to send the data
1282 * @block: pointer to block that contains the page we want to send
1283 * @offset: offset inside the block for the page; updated to last target page
1284 * sent
1285 * @last_stage: if we are at the completion stage
1286 * @bytes_transferred: increase it with the number of transferred bytes
1287 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1289 static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
1290 PageSearchStatus *pss,
1291 bool last_stage,
1292 uint64_t *bytes_transferred,
1293 ram_addr_t dirty_ram_abs)
1295 int tmppages, pages = 0;
1296 do {
1297 tmppages = ram_save_target_page(ms, f, pss, last_stage,
1298 bytes_transferred, dirty_ram_abs);
1299 if (tmppages < 0) {
1300 return tmppages;
1303 pages += tmppages;
1304 pss->offset += TARGET_PAGE_SIZE;
1305 dirty_ram_abs += TARGET_PAGE_SIZE;
1306 } while (pss->offset & (qemu_host_page_size - 1));
1308 /* The offset we leave with is the last one we looked at */
1309 pss->offset -= TARGET_PAGE_SIZE;
1310 return pages;
1314 * ram_find_and_save_block: Finds a dirty page and sends it to f
1316 * Called within an RCU critical section.
1318 * Returns: The number of pages written
1319 * 0 means no dirty pages
1321 * @f: QEMUFile where to send the data
1322 * @last_stage: if we are at the completion stage
1323 * @bytes_transferred: increase it with the number of transferred bytes
1325 * On systems where host-page-size > target-page-size it will send all the
1326 * pages in a host page that are dirty.
1329 static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
1330 uint64_t *bytes_transferred)
1332 PageSearchStatus pss;
1333 MigrationState *ms = migrate_get_current();
1334 int pages = 0;
1335 bool again, found;
1336 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1337 ram_addr_t space */
1339 pss.block = last_seen_block;
1340 pss.offset = last_offset;
1341 pss.complete_round = false;
1343 if (!pss.block) {
1344 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1347 do {
1348 again = true;
1349 found = get_queued_page(ms, &pss, &dirty_ram_abs);
1351 if (!found) {
1352 /* priority queue empty, so just search for something dirty */
1353 found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
1356 if (found) {
1357 pages = ram_save_host_page(ms, f, &pss,
1358 last_stage, bytes_transferred,
1359 dirty_ram_abs);
1361 } while (!pages && again);
1363 last_seen_block = pss.block;
1364 last_offset = pss.offset;
1366 return pages;
1369 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1371 uint64_t pages = size / TARGET_PAGE_SIZE;
1372 if (zero) {
1373 acct_info.dup_pages += pages;
1374 } else {
1375 acct_info.norm_pages += pages;
1376 bytes_transferred += size;
1377 qemu_update_position(f, size);
1381 static ram_addr_t ram_save_remaining(void)
1383 return migration_dirty_pages;
1386 uint64_t ram_bytes_remaining(void)
1388 return ram_save_remaining() * TARGET_PAGE_SIZE;
1391 uint64_t ram_bytes_transferred(void)
1393 return bytes_transferred;
1396 uint64_t ram_bytes_total(void)
1398 RAMBlock *block;
1399 uint64_t total = 0;
1401 rcu_read_lock();
1402 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1403 total += block->used_length;
1404 rcu_read_unlock();
1405 return total;
1408 void free_xbzrle_decoded_buf(void)
1410 g_free(xbzrle_decoded_buf);
1411 xbzrle_decoded_buf = NULL;
1414 static void migration_bitmap_free(struct BitmapRcu *bmap)
1416 g_free(bmap->bmap);
1417 g_free(bmap->unsentmap);
1418 g_free(bmap);
1421 static void ram_migration_cleanup(void *opaque)
1423 /* caller have hold iothread lock or is in a bh, so there is
1424 * no writing race against this migration_bitmap
1426 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1427 atomic_rcu_set(&migration_bitmap_rcu, NULL);
1428 if (bitmap) {
1429 memory_global_dirty_log_stop();
1430 call_rcu(bitmap, migration_bitmap_free, rcu);
1433 XBZRLE_cache_lock();
1434 if (XBZRLE.cache) {
1435 cache_fini(XBZRLE.cache);
1436 g_free(XBZRLE.encoded_buf);
1437 g_free(XBZRLE.current_buf);
1438 XBZRLE.cache = NULL;
1439 XBZRLE.encoded_buf = NULL;
1440 XBZRLE.current_buf = NULL;
1442 XBZRLE_cache_unlock();
1445 static void reset_ram_globals(void)
1447 last_seen_block = NULL;
1448 last_sent_block = NULL;
1449 last_offset = 0;
1450 last_version = ram_list.version;
1451 ram_bulk_stage = true;
1454 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1456 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1458 /* called in qemu main thread, so there is
1459 * no writing race against this migration_bitmap
1461 if (migration_bitmap_rcu) {
1462 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1463 bitmap = g_new(struct BitmapRcu, 1);
1464 bitmap->bmap = bitmap_new(new);
1466 /* prevent migration_bitmap content from being set bit
1467 * by migration_bitmap_sync_range() at the same time.
1468 * it is safe to migration if migration_bitmap is cleared bit
1469 * at the same time.
1471 qemu_mutex_lock(&migration_bitmap_mutex);
1472 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1473 bitmap_set(bitmap->bmap, old, new - old);
1475 /* We don't have a way to safely extend the sentmap
1476 * with RCU; so mark it as missing, entry to postcopy
1477 * will fail.
1479 bitmap->unsentmap = NULL;
1481 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
1482 qemu_mutex_unlock(&migration_bitmap_mutex);
1483 migration_dirty_pages += new - old;
1484 call_rcu(old_bitmap, migration_bitmap_free, rcu);
1489 * 'expected' is the value you expect the bitmap mostly to be full
1490 * of; it won't bother printing lines that are all this value.
1491 * If 'todump' is null the migration bitmap is dumped.
1493 void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1495 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1497 int64_t cur;
1498 int64_t linelen = 128;
1499 char linebuf[129];
1501 if (!todump) {
1502 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1505 for (cur = 0; cur < ram_pages; cur += linelen) {
1506 int64_t curb;
1507 bool found = false;
1509 * Last line; catch the case where the line length
1510 * is longer than remaining ram
1512 if (cur + linelen > ram_pages) {
1513 linelen = ram_pages - cur;
1515 for (curb = 0; curb < linelen; curb++) {
1516 bool thisbit = test_bit(cur + curb, todump);
1517 linebuf[curb] = thisbit ? '1' : '.';
1518 found = found || (thisbit != expected);
1520 if (found) {
1521 linebuf[curb] = '\0';
1522 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1527 /* **** functions for postcopy ***** */
1530 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1531 * Note: At this point the 'unsentmap' is the processed bitmap combined
1532 * with the dirtymap; so a '1' means it's either dirty or unsent.
1533 * start,length: Indexes into the bitmap for the first bit
1534 * representing the named block and length in target-pages
1536 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1537 PostcopyDiscardState *pds,
1538 unsigned long start,
1539 unsigned long length)
1541 unsigned long end = start + length; /* one after the end */
1542 unsigned long current;
1543 unsigned long *unsentmap;
1545 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1546 for (current = start; current < end; ) {
1547 unsigned long one = find_next_bit(unsentmap, end, current);
1549 if (one <= end) {
1550 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1551 unsigned long discard_length;
1553 if (zero >= end) {
1554 discard_length = end - one;
1555 } else {
1556 discard_length = zero - one;
1558 if (discard_length) {
1559 postcopy_discard_send_range(ms, pds, one, discard_length);
1561 current = one + discard_length;
1562 } else {
1563 current = one;
1567 return 0;
1571 * Utility for the outgoing postcopy code.
1572 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1573 * passing it bitmap indexes and name.
1574 * Returns: 0 on success
1575 * (qemu_ram_foreach_block ends up passing unscaled lengths
1576 * which would mean postcopy code would have to deal with target page)
1578 static int postcopy_each_ram_send_discard(MigrationState *ms)
1580 struct RAMBlock *block;
1581 int ret;
1583 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1584 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1585 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1586 first,
1587 block->idstr);
1590 * Postcopy sends chunks of bitmap over the wire, but it
1591 * just needs indexes at this point, avoids it having
1592 * target page specific code.
1594 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1595 block->used_length >> TARGET_PAGE_BITS);
1596 postcopy_discard_send_finish(ms, pds);
1597 if (ret) {
1598 return ret;
1602 return 0;
1606 * Helper for postcopy_chunk_hostpages; it's called twice to cleanup
1607 * the two bitmaps, that are similar, but one is inverted.
1609 * We search for runs of target-pages that don't start or end on a
1610 * host page boundary;
1611 * unsent_pass=true: Cleans up partially unsent host pages by searching
1612 * the unsentmap
1613 * unsent_pass=false: Cleans up partially dirty host pages by searching
1614 * the main migration bitmap
1617 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1618 RAMBlock *block,
1619 PostcopyDiscardState *pds)
1621 unsigned long *bitmap;
1622 unsigned long *unsentmap;
1623 unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
1624 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1625 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1626 unsigned long last = first + (len - 1);
1627 unsigned long run_start;
1629 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1630 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1632 if (unsent_pass) {
1633 /* Find a sent page */
1634 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1635 } else {
1636 /* Find a dirty page */
1637 run_start = find_next_bit(bitmap, last + 1, first);
1640 while (run_start <= last) {
1641 bool do_fixup = false;
1642 unsigned long fixup_start_addr;
1643 unsigned long host_offset;
1646 * If the start of this run of pages is in the middle of a host
1647 * page, then we need to fixup this host page.
1649 host_offset = run_start % host_ratio;
1650 if (host_offset) {
1651 do_fixup = true;
1652 run_start -= host_offset;
1653 fixup_start_addr = run_start;
1654 /* For the next pass */
1655 run_start = run_start + host_ratio;
1656 } else {
1657 /* Find the end of this run */
1658 unsigned long run_end;
1659 if (unsent_pass) {
1660 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1661 } else {
1662 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1665 * If the end isn't at the start of a host page, then the
1666 * run doesn't finish at the end of a host page
1667 * and we need to discard.
1669 host_offset = run_end % host_ratio;
1670 if (host_offset) {
1671 do_fixup = true;
1672 fixup_start_addr = run_end - host_offset;
1674 * This host page has gone, the next loop iteration starts
1675 * from after the fixup
1677 run_start = fixup_start_addr + host_ratio;
1678 } else {
1680 * No discards on this iteration, next loop starts from
1681 * next sent/dirty page
1683 run_start = run_end + 1;
1687 if (do_fixup) {
1688 unsigned long page;
1690 /* Tell the destination to discard this page */
1691 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1692 /* For the unsent_pass we:
1693 * discard partially sent pages
1694 * For the !unsent_pass (dirty) we:
1695 * discard partially dirty pages that were sent
1696 * (any partially sent pages were already discarded
1697 * by the previous unsent_pass)
1699 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1700 host_ratio);
1703 /* Clean up the bitmap */
1704 for (page = fixup_start_addr;
1705 page < fixup_start_addr + host_ratio; page++) {
1706 /* All pages in this host page are now not sent */
1707 set_bit(page, unsentmap);
1710 * Remark them as dirty, updating the count for any pages
1711 * that weren't previously dirty.
1713 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1717 if (unsent_pass) {
1718 /* Find the next sent page for the next iteration */
1719 run_start = find_next_zero_bit(unsentmap, last + 1,
1720 run_start);
1721 } else {
1722 /* Find the next dirty page for the next iteration */
1723 run_start = find_next_bit(bitmap, last + 1, run_start);
1729 * Utility for the outgoing postcopy code.
1731 * Discard any partially sent host-page size chunks, mark any partially
1732 * dirty host-page size chunks as all dirty.
1734 * Returns: 0 on success
1736 static int postcopy_chunk_hostpages(MigrationState *ms)
1738 struct RAMBlock *block;
1740 if (qemu_host_page_size == TARGET_PAGE_SIZE) {
1741 /* Easy case - TPS==HPS - nothing to be done */
1742 return 0;
1745 /* Easiest way to make sure we don't resume in the middle of a host-page */
1746 last_seen_block = NULL;
1747 last_sent_block = NULL;
1748 last_offset = 0;
1750 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1751 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1753 PostcopyDiscardState *pds =
1754 postcopy_discard_send_init(ms, first, block->idstr);
1756 /* First pass: Discard all partially sent host pages */
1757 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1759 * Second pass: Ensure that all partially dirty host pages are made
1760 * fully dirty.
1762 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1764 postcopy_discard_send_finish(ms, pds);
1765 } /* ram_list loop */
1767 return 0;
1771 * Transmit the set of pages to be discarded after precopy to the target
1772 * these are pages that:
1773 * a) Have been previously transmitted but are now dirty again
1774 * b) Pages that have never been transmitted, this ensures that
1775 * any pages on the destination that have been mapped by background
1776 * tasks get discarded (transparent huge pages is the specific concern)
1777 * Hopefully this is pretty sparse
1779 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1781 int ret;
1782 unsigned long *bitmap, *unsentmap;
1784 rcu_read_lock();
1786 /* This should be our last sync, the src is now paused */
1787 migration_bitmap_sync();
1789 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1790 if (!unsentmap) {
1791 /* We don't have a safe way to resize the sentmap, so
1792 * if the bitmap was resized it will be NULL at this
1793 * point.
1795 error_report("migration ram resized during precopy phase");
1796 rcu_read_unlock();
1797 return -EINVAL;
1800 /* Deal with TPS != HPS */
1801 ret = postcopy_chunk_hostpages(ms);
1802 if (ret) {
1803 rcu_read_unlock();
1804 return ret;
1808 * Update the unsentmap to be unsentmap = unsentmap | dirty
1810 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1811 bitmap_or(unsentmap, unsentmap, bitmap,
1812 last_ram_offset() >> TARGET_PAGE_BITS);
1815 trace_ram_postcopy_send_discard_bitmap();
1816 #ifdef DEBUG_POSTCOPY
1817 ram_debug_dump_bitmap(unsentmap, true);
1818 #endif
1820 ret = postcopy_each_ram_send_discard(ms);
1821 rcu_read_unlock();
1823 return ret;
1827 * At the start of the postcopy phase of migration, any now-dirty
1828 * precopied pages are discarded.
1830 * start, length describe a byte address range within the RAMBlock
1832 * Returns 0 on success.
1834 int ram_discard_range(MigrationIncomingState *mis,
1835 const char *block_name,
1836 uint64_t start, size_t length)
1838 int ret = -1;
1840 rcu_read_lock();
1841 RAMBlock *rb = qemu_ram_block_by_name(block_name);
1843 if (!rb) {
1844 error_report("ram_discard_range: Failed to find block '%s'",
1845 block_name);
1846 goto err;
1849 uint8_t *host_startaddr = rb->host + start;
1851 if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
1852 error_report("ram_discard_range: Unaligned start address: %p",
1853 host_startaddr);
1854 goto err;
1857 if ((start + length) <= rb->used_length) {
1858 uint8_t *host_endaddr = host_startaddr + length;
1859 if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
1860 error_report("ram_discard_range: Unaligned end address: %p",
1861 host_endaddr);
1862 goto err;
1864 ret = postcopy_ram_discard_range(mis, host_startaddr, length);
1865 } else {
1866 error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
1867 "/%zx/" RAM_ADDR_FMT")",
1868 block_name, start, length, rb->used_length);
1871 err:
1872 rcu_read_unlock();
1874 return ret;
1878 /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1879 * long-running RCU critical section. When rcu-reclaims in the code
1880 * start to become numerous it will be necessary to reduce the
1881 * granularity of these critical sections.
1884 static int ram_save_setup(QEMUFile *f, void *opaque)
1886 RAMBlock *block;
1887 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1889 dirty_rate_high_cnt = 0;
1890 bitmap_sync_count = 0;
1891 migration_bitmap_sync_init();
1892 qemu_mutex_init(&migration_bitmap_mutex);
1894 if (migrate_use_xbzrle()) {
1895 XBZRLE_cache_lock();
1896 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1897 TARGET_PAGE_SIZE,
1898 TARGET_PAGE_SIZE);
1899 if (!XBZRLE.cache) {
1900 XBZRLE_cache_unlock();
1901 error_report("Error creating cache");
1902 return -1;
1904 XBZRLE_cache_unlock();
1906 /* We prefer not to abort if there is no memory */
1907 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1908 if (!XBZRLE.encoded_buf) {
1909 error_report("Error allocating encoded_buf");
1910 return -1;
1913 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1914 if (!XBZRLE.current_buf) {
1915 error_report("Error allocating current_buf");
1916 g_free(XBZRLE.encoded_buf);
1917 XBZRLE.encoded_buf = NULL;
1918 return -1;
1921 acct_clear();
1924 /* For memory_global_dirty_log_start below. */
1925 qemu_mutex_lock_iothread();
1927 qemu_mutex_lock_ramlist();
1928 rcu_read_lock();
1929 bytes_transferred = 0;
1930 reset_ram_globals();
1932 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1933 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
1934 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
1935 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
1937 if (migrate_postcopy_ram()) {
1938 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
1939 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
1943 * Count the total number of pages used by ram blocks not including any
1944 * gaps due to alignment or unplugs.
1946 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1948 memory_global_dirty_log_start();
1949 migration_bitmap_sync();
1950 qemu_mutex_unlock_ramlist();
1951 qemu_mutex_unlock_iothread();
1953 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1955 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1956 qemu_put_byte(f, strlen(block->idstr));
1957 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1958 qemu_put_be64(f, block->used_length);
1961 rcu_read_unlock();
1963 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
1964 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
1966 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1968 return 0;
1971 static int ram_save_iterate(QEMUFile *f, void *opaque)
1973 int ret;
1974 int i;
1975 int64_t t0;
1976 int pages_sent = 0;
1978 rcu_read_lock();
1979 if (ram_list.version != last_version) {
1980 reset_ram_globals();
1983 /* Read version before ram_list.blocks */
1984 smp_rmb();
1986 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
1988 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1989 i = 0;
1990 while ((ret = qemu_file_rate_limit(f)) == 0) {
1991 int pages;
1993 pages = ram_find_and_save_block(f, false, &bytes_transferred);
1994 /* no more pages to sent */
1995 if (pages == 0) {
1996 break;
1998 pages_sent += pages;
1999 acct_info.iterations++;
2001 /* we want to check in the 1st loop, just in case it was the 1st time
2002 and we had to sync the dirty bitmap.
2003 qemu_get_clock_ns() is a bit expensive, so we only check each some
2004 iterations
2006 if ((i & 63) == 0) {
2007 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2008 if (t1 > MAX_WAIT) {
2009 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
2010 t1, i);
2011 break;
2014 i++;
2016 flush_compressed_data(f);
2017 rcu_read_unlock();
2020 * Must occur before EOS (or any QEMUFile operation)
2021 * because of RDMA protocol.
2023 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2025 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2026 bytes_transferred += 8;
2028 ret = qemu_file_get_error(f);
2029 if (ret < 0) {
2030 return ret;
2033 return pages_sent;
2036 /* Called with iothread lock */
2037 static int ram_save_complete(QEMUFile *f, void *opaque)
2039 rcu_read_lock();
2041 if (!migration_in_postcopy(migrate_get_current())) {
2042 migration_bitmap_sync();
2045 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2047 /* try transferring iterative blocks of memory */
2049 /* flush all remaining blocks regardless of rate limiting */
2050 while (true) {
2051 int pages;
2053 pages = ram_find_and_save_block(f, true, &bytes_transferred);
2054 /* no more blocks to sent */
2055 if (pages == 0) {
2056 break;
2060 flush_compressed_data(f);
2061 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2063 rcu_read_unlock();
2065 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2067 return 0;
2070 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2071 uint64_t *non_postcopiable_pending,
2072 uint64_t *postcopiable_pending)
2074 uint64_t remaining_size;
2076 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2078 if (!migration_in_postcopy(migrate_get_current()) &&
2079 remaining_size < max_size) {
2080 qemu_mutex_lock_iothread();
2081 rcu_read_lock();
2082 migration_bitmap_sync();
2083 rcu_read_unlock();
2084 qemu_mutex_unlock_iothread();
2085 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2088 /* We can do postcopy, and all the data is postcopiable */
2089 *postcopiable_pending += remaining_size;
2092 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2094 unsigned int xh_len;
2095 int xh_flags;
2096 uint8_t *loaded_data;
2098 if (!xbzrle_decoded_buf) {
2099 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2101 loaded_data = xbzrle_decoded_buf;
2103 /* extract RLE header */
2104 xh_flags = qemu_get_byte(f);
2105 xh_len = qemu_get_be16(f);
2107 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2108 error_report("Failed to load XBZRLE page - wrong compression!");
2109 return -1;
2112 if (xh_len > TARGET_PAGE_SIZE) {
2113 error_report("Failed to load XBZRLE page - len overflow!");
2114 return -1;
2116 /* load data and decode */
2117 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2119 /* decode RLE */
2120 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2121 TARGET_PAGE_SIZE) == -1) {
2122 error_report("Failed to load XBZRLE page - decode error!");
2123 return -1;
2126 return 0;
2129 /* Must be called from within a rcu critical section.
2130 * Returns a pointer from within the RCU-protected ram_list.
2133 * Read a RAMBlock ID from the stream f.
2135 * f: Stream to read from
2136 * flags: Page flags (mostly to see if it's a continuation of previous block)
2138 static inline RAMBlock *ram_block_from_stream(QEMUFile *f,
2139 int flags)
2141 static RAMBlock *block = NULL;
2142 char id[256];
2143 uint8_t len;
2145 if (flags & RAM_SAVE_FLAG_CONTINUE) {
2146 if (!block) {
2147 error_report("Ack, bad migration stream!");
2148 return NULL;
2150 return block;
2153 len = qemu_get_byte(f);
2154 qemu_get_buffer(f, (uint8_t *)id, len);
2155 id[len] = 0;
2157 block = qemu_ram_block_by_name(id);
2158 if (!block) {
2159 error_report("Can't find block %s", id);
2160 return NULL;
2163 return block;
2166 static inline void *host_from_ram_block_offset(RAMBlock *block,
2167 ram_addr_t offset)
2169 if (!offset_in_ramblock(block, offset)) {
2170 return NULL;
2173 return block->host + offset;
2177 * If a page (or a whole RDMA chunk) has been
2178 * determined to be zero, then zap it.
2180 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2182 if (ch != 0 || !is_zero_range(host, size)) {
2183 memset(host, ch, size);
2187 static void *do_data_decompress(void *opaque)
2189 DecompressParam *param = opaque;
2190 unsigned long pagesize;
2191 uint8_t *des;
2192 int len;
2194 qemu_mutex_lock(&param->mutex);
2195 while (!param->quit) {
2196 if (param->des) {
2197 des = param->des;
2198 len = param->len;
2199 param->des = 0;
2200 qemu_mutex_unlock(&param->mutex);
2202 pagesize = TARGET_PAGE_SIZE;
2203 /* uncompress() will return failed in some case, especially
2204 * when the page is dirted when doing the compression, it's
2205 * not a problem because the dirty page will be retransferred
2206 * and uncompress() won't break the data in other pages.
2208 uncompress((Bytef *)des, &pagesize,
2209 (const Bytef *)param->compbuf, len);
2211 qemu_mutex_lock(&decomp_done_lock);
2212 param->done = true;
2213 qemu_cond_signal(&decomp_done_cond);
2214 qemu_mutex_unlock(&decomp_done_lock);
2216 qemu_mutex_lock(&param->mutex);
2217 } else {
2218 qemu_cond_wait(&param->cond, &param->mutex);
2221 qemu_mutex_unlock(&param->mutex);
2223 return NULL;
2226 static void wait_for_decompress_done(void)
2228 int idx, thread_count;
2230 if (!migrate_use_compression()) {
2231 return;
2234 thread_count = migrate_decompress_threads();
2235 qemu_mutex_lock(&decomp_done_lock);
2236 for (idx = 0; idx < thread_count; idx++) {
2237 while (!decomp_param[idx].done) {
2238 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2241 qemu_mutex_unlock(&decomp_done_lock);
2244 void migrate_decompress_threads_create(void)
2246 int i, thread_count;
2248 thread_count = migrate_decompress_threads();
2249 decompress_threads = g_new0(QemuThread, thread_count);
2250 decomp_param = g_new0(DecompressParam, thread_count);
2251 qemu_mutex_init(&decomp_done_lock);
2252 qemu_cond_init(&decomp_done_cond);
2253 for (i = 0; i < thread_count; i++) {
2254 qemu_mutex_init(&decomp_param[i].mutex);
2255 qemu_cond_init(&decomp_param[i].cond);
2256 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2257 decomp_param[i].done = true;
2258 decomp_param[i].quit = false;
2259 qemu_thread_create(decompress_threads + i, "decompress",
2260 do_data_decompress, decomp_param + i,
2261 QEMU_THREAD_JOINABLE);
2265 void migrate_decompress_threads_join(void)
2267 int i, thread_count;
2269 thread_count = migrate_decompress_threads();
2270 for (i = 0; i < thread_count; i++) {
2271 qemu_mutex_lock(&decomp_param[i].mutex);
2272 decomp_param[i].quit = true;
2273 qemu_cond_signal(&decomp_param[i].cond);
2274 qemu_mutex_unlock(&decomp_param[i].mutex);
2276 for (i = 0; i < thread_count; i++) {
2277 qemu_thread_join(decompress_threads + i);
2278 qemu_mutex_destroy(&decomp_param[i].mutex);
2279 qemu_cond_destroy(&decomp_param[i].cond);
2280 g_free(decomp_param[i].compbuf);
2282 g_free(decompress_threads);
2283 g_free(decomp_param);
2284 decompress_threads = NULL;
2285 decomp_param = NULL;
2288 static void decompress_data_with_multi_threads(QEMUFile *f,
2289 void *host, int len)
2291 int idx, thread_count;
2293 thread_count = migrate_decompress_threads();
2294 qemu_mutex_lock(&decomp_done_lock);
2295 while (true) {
2296 for (idx = 0; idx < thread_count; idx++) {
2297 if (decomp_param[idx].done) {
2298 decomp_param[idx].done = false;
2299 qemu_mutex_lock(&decomp_param[idx].mutex);
2300 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2301 decomp_param[idx].des = host;
2302 decomp_param[idx].len = len;
2303 qemu_cond_signal(&decomp_param[idx].cond);
2304 qemu_mutex_unlock(&decomp_param[idx].mutex);
2305 break;
2308 if (idx < thread_count) {
2309 break;
2310 } else {
2311 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2314 qemu_mutex_unlock(&decomp_done_lock);
2318 * Allocate data structures etc needed by incoming migration with postcopy-ram
2319 * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
2321 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2323 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2325 return postcopy_ram_incoming_init(mis, ram_pages);
2329 * Called in postcopy mode by ram_load().
2330 * rcu_read_lock is taken prior to this being called.
2332 static int ram_load_postcopy(QEMUFile *f)
2334 int flags = 0, ret = 0;
2335 bool place_needed = false;
2336 bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
2337 MigrationIncomingState *mis = migration_incoming_get_current();
2338 /* Temporary page that is later 'placed' */
2339 void *postcopy_host_page = postcopy_get_tmp_page(mis);
2340 void *last_host = NULL;
2341 bool all_zero = false;
2343 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2344 ram_addr_t addr;
2345 void *host = NULL;
2346 void *page_buffer = NULL;
2347 void *place_source = NULL;
2348 uint8_t ch;
2350 addr = qemu_get_be64(f);
2351 flags = addr & ~TARGET_PAGE_MASK;
2352 addr &= TARGET_PAGE_MASK;
2354 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2355 place_needed = false;
2356 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
2357 RAMBlock *block = ram_block_from_stream(f, flags);
2359 host = host_from_ram_block_offset(block, addr);
2360 if (!host) {
2361 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2362 ret = -EINVAL;
2363 break;
2366 * Postcopy requires that we place whole host pages atomically.
2367 * To make it atomic, the data is read into a temporary page
2368 * that's moved into place later.
2369 * The migration protocol uses, possibly smaller, target-pages
2370 * however the source ensures it always sends all the components
2371 * of a host page in order.
2373 page_buffer = postcopy_host_page +
2374 ((uintptr_t)host & ~qemu_host_page_mask);
2375 /* If all TP are zero then we can optimise the place */
2376 if (!((uintptr_t)host & ~qemu_host_page_mask)) {
2377 all_zero = true;
2378 } else {
2379 /* not the 1st TP within the HP */
2380 if (host != (last_host + TARGET_PAGE_SIZE)) {
2381 error_report("Non-sequential target page %p/%p",
2382 host, last_host);
2383 ret = -EINVAL;
2384 break;
2390 * If it's the last part of a host page then we place the host
2391 * page
2393 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2394 ~qemu_host_page_mask) == 0;
2395 place_source = postcopy_host_page;
2397 last_host = host;
2399 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2400 case RAM_SAVE_FLAG_COMPRESS:
2401 ch = qemu_get_byte(f);
2402 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2403 if (ch) {
2404 all_zero = false;
2406 break;
2408 case RAM_SAVE_FLAG_PAGE:
2409 all_zero = false;
2410 if (!place_needed || !matching_page_sizes) {
2411 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2412 } else {
2413 /* Avoids the qemu_file copy during postcopy, which is
2414 * going to do a copy later; can only do it when we
2415 * do this read in one go (matching page sizes)
2417 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2418 TARGET_PAGE_SIZE);
2420 break;
2421 case RAM_SAVE_FLAG_EOS:
2422 /* normal exit */
2423 break;
2424 default:
2425 error_report("Unknown combination of migration flags: %#x"
2426 " (postcopy mode)", flags);
2427 ret = -EINVAL;
2430 if (place_needed) {
2431 /* This gets called at the last target page in the host page */
2432 if (all_zero) {
2433 ret = postcopy_place_page_zero(mis,
2434 host + TARGET_PAGE_SIZE -
2435 qemu_host_page_size);
2436 } else {
2437 ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
2438 qemu_host_page_size,
2439 place_source);
2442 if (!ret) {
2443 ret = qemu_file_get_error(f);
2447 return ret;
2450 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2452 int flags = 0, ret = 0;
2453 static uint64_t seq_iter;
2454 int len = 0;
2456 * If system is running in postcopy mode, page inserts to host memory must
2457 * be atomic
2459 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2461 seq_iter++;
2463 if (version_id != 4) {
2464 ret = -EINVAL;
2467 /* This RCU critical section can be very long running.
2468 * When RCU reclaims in the code start to become numerous,
2469 * it will be necessary to reduce the granularity of this
2470 * critical section.
2472 rcu_read_lock();
2474 if (postcopy_running) {
2475 ret = ram_load_postcopy(f);
2478 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2479 ram_addr_t addr, total_ram_bytes;
2480 void *host = NULL;
2481 uint8_t ch;
2483 addr = qemu_get_be64(f);
2484 flags = addr & ~TARGET_PAGE_MASK;
2485 addr &= TARGET_PAGE_MASK;
2487 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2488 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2489 RAMBlock *block = ram_block_from_stream(f, flags);
2491 host = host_from_ram_block_offset(block, addr);
2492 if (!host) {
2493 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2494 ret = -EINVAL;
2495 break;
2499 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2500 case RAM_SAVE_FLAG_MEM_SIZE:
2501 /* Synchronize RAM block list */
2502 total_ram_bytes = addr;
2503 while (!ret && total_ram_bytes) {
2504 RAMBlock *block;
2505 char id[256];
2506 ram_addr_t length;
2508 len = qemu_get_byte(f);
2509 qemu_get_buffer(f, (uint8_t *)id, len);
2510 id[len] = 0;
2511 length = qemu_get_be64(f);
2513 block = qemu_ram_block_by_name(id);
2514 if (block) {
2515 if (length != block->used_length) {
2516 Error *local_err = NULL;
2518 ret = qemu_ram_resize(block, length,
2519 &local_err);
2520 if (local_err) {
2521 error_report_err(local_err);
2524 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2525 block->idstr);
2526 } else {
2527 error_report("Unknown ramblock \"%s\", cannot "
2528 "accept migration", id);
2529 ret = -EINVAL;
2532 total_ram_bytes -= length;
2534 break;
2536 case RAM_SAVE_FLAG_COMPRESS:
2537 ch = qemu_get_byte(f);
2538 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2539 break;
2541 case RAM_SAVE_FLAG_PAGE:
2542 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2543 break;
2545 case RAM_SAVE_FLAG_COMPRESS_PAGE:
2546 len = qemu_get_be32(f);
2547 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2548 error_report("Invalid compressed data length: %d", len);
2549 ret = -EINVAL;
2550 break;
2552 decompress_data_with_multi_threads(f, host, len);
2553 break;
2555 case RAM_SAVE_FLAG_XBZRLE:
2556 if (load_xbzrle(f, addr, host) < 0) {
2557 error_report("Failed to decompress XBZRLE page at "
2558 RAM_ADDR_FMT, addr);
2559 ret = -EINVAL;
2560 break;
2562 break;
2563 case RAM_SAVE_FLAG_EOS:
2564 /* normal exit */
2565 break;
2566 default:
2567 if (flags & RAM_SAVE_FLAG_HOOK) {
2568 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2569 } else {
2570 error_report("Unknown combination of migration flags: %#x",
2571 flags);
2572 ret = -EINVAL;
2575 if (!ret) {
2576 ret = qemu_file_get_error(f);
2580 wait_for_decompress_done();
2581 rcu_read_unlock();
2582 DPRINTF("Completed load of VM with exit code %d seq iteration "
2583 "%" PRIu64 "\n", ret, seq_iter);
2584 return ret;
2587 static SaveVMHandlers savevm_ram_handlers = {
2588 .save_live_setup = ram_save_setup,
2589 .save_live_iterate = ram_save_iterate,
2590 .save_live_complete_postcopy = ram_save_complete,
2591 .save_live_complete_precopy = ram_save_complete,
2592 .save_live_pending = ram_save_pending,
2593 .load_state = ram_load,
2594 .cleanup = ram_migration_cleanup,
2597 void ram_mig_init(void)
2599 qemu_mutex_init(&XBZRLE.lock);
2600 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);