virtio-pci: address space translation service (ATS) support
[qemu.git] / migration / ram.c
bloba1c80890106075af37b2ce168b2b9affd0943bfb
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
28 #include "qemu/osdep.h"
29 #include "qemu-common.h"
30 #include "cpu.h"
31 #include <zlib.h>
32 #include "qapi-event.h"
33 #include "qemu/cutils.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "qemu/timer.h"
37 #include "qemu/main-loop.h"
38 #include "migration/migration.h"
39 #include "migration/postcopy-ram.h"
40 #include "exec/address-spaces.h"
41 #include "migration/page_cache.h"
42 #include "qemu/error-report.h"
43 #include "trace.h"
44 #include "exec/ram_addr.h"
45 #include "qemu/rcu_queue.h"
46 #include "migration/colo.h"
48 #ifdef DEBUG_MIGRATION_RAM
49 #define DPRINTF(fmt, ...) \
50 do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
51 #else
52 #define DPRINTF(fmt, ...) \
53 do { } while (0)
54 #endif
56 static int dirty_rate_high_cnt;
58 static uint64_t bitmap_sync_count;
60 /***********************************************************/
61 /* ram save/restore */
63 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
64 #define RAM_SAVE_FLAG_COMPRESS 0x02
65 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
66 #define RAM_SAVE_FLAG_PAGE 0x08
67 #define RAM_SAVE_FLAG_EOS 0x10
68 #define RAM_SAVE_FLAG_CONTINUE 0x20
69 #define RAM_SAVE_FLAG_XBZRLE 0x40
70 /* 0x80 is reserved in migration.h start with 0x100 next */
71 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
73 static uint8_t *ZERO_TARGET_PAGE;
75 static inline bool is_zero_range(uint8_t *p, uint64_t size)
77 return buffer_is_zero(p, size);
80 /* struct contains XBZRLE cache and a static page
81 used by the compression */
82 static struct {
83 /* buffer used for XBZRLE encoding */
84 uint8_t *encoded_buf;
85 /* buffer for storing page content */
86 uint8_t *current_buf;
87 /* Cache for XBZRLE, Protected by lock. */
88 PageCache *cache;
89 QemuMutex lock;
90 } XBZRLE;
92 /* buffer used for XBZRLE decoding */
93 static uint8_t *xbzrle_decoded_buf;
95 static void XBZRLE_cache_lock(void)
97 if (migrate_use_xbzrle())
98 qemu_mutex_lock(&XBZRLE.lock);
101 static void XBZRLE_cache_unlock(void)
103 if (migrate_use_xbzrle())
104 qemu_mutex_unlock(&XBZRLE.lock);
108 * called from qmp_migrate_set_cache_size in main thread, possibly while
109 * a migration is in progress.
110 * A running migration maybe using the cache and might finish during this
111 * call, hence changes to the cache are protected by XBZRLE.lock().
113 int64_t xbzrle_cache_resize(int64_t new_size)
115 PageCache *new_cache;
116 int64_t ret;
118 if (new_size < TARGET_PAGE_SIZE) {
119 return -1;
122 XBZRLE_cache_lock();
124 if (XBZRLE.cache != NULL) {
125 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
126 goto out_new_size;
128 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
129 TARGET_PAGE_SIZE);
130 if (!new_cache) {
131 error_report("Error creating cache");
132 ret = -1;
133 goto out;
136 cache_fini(XBZRLE.cache);
137 XBZRLE.cache = new_cache;
140 out_new_size:
141 ret = pow2floor(new_size);
142 out:
143 XBZRLE_cache_unlock();
144 return ret;
147 /* accounting for migration statistics */
148 typedef struct AccountingInfo {
149 uint64_t dup_pages;
150 uint64_t skipped_pages;
151 uint64_t norm_pages;
152 uint64_t iterations;
153 uint64_t xbzrle_bytes;
154 uint64_t xbzrle_pages;
155 uint64_t xbzrle_cache_miss;
156 double xbzrle_cache_miss_rate;
157 uint64_t xbzrle_overflows;
158 } AccountingInfo;
160 static AccountingInfo acct_info;
162 static void acct_clear(void)
164 memset(&acct_info, 0, sizeof(acct_info));
167 uint64_t dup_mig_bytes_transferred(void)
169 return acct_info.dup_pages * TARGET_PAGE_SIZE;
172 uint64_t dup_mig_pages_transferred(void)
174 return acct_info.dup_pages;
177 uint64_t skipped_mig_bytes_transferred(void)
179 return acct_info.skipped_pages * TARGET_PAGE_SIZE;
182 uint64_t skipped_mig_pages_transferred(void)
184 return acct_info.skipped_pages;
187 uint64_t norm_mig_bytes_transferred(void)
189 return acct_info.norm_pages * TARGET_PAGE_SIZE;
192 uint64_t norm_mig_pages_transferred(void)
194 return acct_info.norm_pages;
197 uint64_t xbzrle_mig_bytes_transferred(void)
199 return acct_info.xbzrle_bytes;
202 uint64_t xbzrle_mig_pages_transferred(void)
204 return acct_info.xbzrle_pages;
207 uint64_t xbzrle_mig_pages_cache_miss(void)
209 return acct_info.xbzrle_cache_miss;
212 double xbzrle_mig_cache_miss_rate(void)
214 return acct_info.xbzrle_cache_miss_rate;
217 uint64_t xbzrle_mig_pages_overflow(void)
219 return acct_info.xbzrle_overflows;
222 /* This is the last block that we have visited serching for dirty pages
224 static RAMBlock *last_seen_block;
225 /* This is the last block from where we have sent data */
226 static RAMBlock *last_sent_block;
227 static ram_addr_t last_offset;
228 static QemuMutex migration_bitmap_mutex;
229 static uint64_t migration_dirty_pages;
230 static uint32_t last_version;
231 static bool ram_bulk_stage;
233 /* used by the search for pages to send */
234 struct PageSearchStatus {
235 /* Current block being searched */
236 RAMBlock *block;
237 /* Current offset to search from */
238 ram_addr_t offset;
239 /* Set once we wrap around */
240 bool complete_round;
242 typedef struct PageSearchStatus PageSearchStatus;
244 static struct BitmapRcu {
245 struct rcu_head rcu;
246 /* Main migration bitmap */
247 unsigned long *bmap;
248 /* bitmap of pages that haven't been sent even once
249 * only maintained and used in postcopy at the moment
250 * where it's used to send the dirtymap at the start
251 * of the postcopy phase
253 unsigned long *unsentmap;
254 } *migration_bitmap_rcu;
256 struct CompressParam {
257 bool done;
258 bool quit;
259 QEMUFile *file;
260 QemuMutex mutex;
261 QemuCond cond;
262 RAMBlock *block;
263 ram_addr_t offset;
265 typedef struct CompressParam CompressParam;
267 struct DecompressParam {
268 bool done;
269 bool quit;
270 QemuMutex mutex;
271 QemuCond cond;
272 void *des;
273 uint8_t *compbuf;
274 int len;
276 typedef struct DecompressParam DecompressParam;
278 static CompressParam *comp_param;
279 static QemuThread *compress_threads;
280 /* comp_done_cond is used to wake up the migration thread when
281 * one of the compression threads has finished the compression.
282 * comp_done_lock is used to co-work with comp_done_cond.
284 static QemuMutex comp_done_lock;
285 static QemuCond comp_done_cond;
286 /* The empty QEMUFileOps will be used by file in CompressParam */
287 static const QEMUFileOps empty_ops = { };
289 static bool compression_switch;
290 static DecompressParam *decomp_param;
291 static QemuThread *decompress_threads;
292 static QemuMutex decomp_done_lock;
293 static QemuCond decomp_done_cond;
295 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
296 ram_addr_t offset);
298 static void *do_data_compress(void *opaque)
300 CompressParam *param = opaque;
301 RAMBlock *block;
302 ram_addr_t offset;
304 qemu_mutex_lock(&param->mutex);
305 while (!param->quit) {
306 if (param->block) {
307 block = param->block;
308 offset = param->offset;
309 param->block = NULL;
310 qemu_mutex_unlock(&param->mutex);
312 do_compress_ram_page(param->file, block, offset);
314 qemu_mutex_lock(&comp_done_lock);
315 param->done = true;
316 qemu_cond_signal(&comp_done_cond);
317 qemu_mutex_unlock(&comp_done_lock);
319 qemu_mutex_lock(&param->mutex);
320 } else {
321 qemu_cond_wait(&param->cond, &param->mutex);
324 qemu_mutex_unlock(&param->mutex);
326 return NULL;
329 static inline void terminate_compression_threads(void)
331 int idx, thread_count;
333 thread_count = migrate_compress_threads();
334 for (idx = 0; idx < thread_count; idx++) {
335 qemu_mutex_lock(&comp_param[idx].mutex);
336 comp_param[idx].quit = true;
337 qemu_cond_signal(&comp_param[idx].cond);
338 qemu_mutex_unlock(&comp_param[idx].mutex);
342 void migrate_compress_threads_join(void)
344 int i, thread_count;
346 if (!migrate_use_compression()) {
347 return;
349 terminate_compression_threads();
350 thread_count = migrate_compress_threads();
351 for (i = 0; i < thread_count; i++) {
352 qemu_thread_join(compress_threads + i);
353 qemu_fclose(comp_param[i].file);
354 qemu_mutex_destroy(&comp_param[i].mutex);
355 qemu_cond_destroy(&comp_param[i].cond);
357 qemu_mutex_destroy(&comp_done_lock);
358 qemu_cond_destroy(&comp_done_cond);
359 g_free(compress_threads);
360 g_free(comp_param);
361 compress_threads = NULL;
362 comp_param = NULL;
365 void migrate_compress_threads_create(void)
367 int i, thread_count;
369 if (!migrate_use_compression()) {
370 return;
372 compression_switch = true;
373 thread_count = migrate_compress_threads();
374 compress_threads = g_new0(QemuThread, thread_count);
375 comp_param = g_new0(CompressParam, thread_count);
376 qemu_cond_init(&comp_done_cond);
377 qemu_mutex_init(&comp_done_lock);
378 for (i = 0; i < thread_count; i++) {
379 /* comp_param[i].file is just used as a dummy buffer to save data,
380 * set its ops to empty.
382 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
383 comp_param[i].done = true;
384 comp_param[i].quit = false;
385 qemu_mutex_init(&comp_param[i].mutex);
386 qemu_cond_init(&comp_param[i].cond);
387 qemu_thread_create(compress_threads + i, "compress",
388 do_data_compress, comp_param + i,
389 QEMU_THREAD_JOINABLE);
394 * save_page_header: Write page header to wire
396 * If this is the 1st block, it also writes the block identification
398 * Returns: Number of bytes written
400 * @f: QEMUFile where to send the data
401 * @block: block that contains the page we want to send
402 * @offset: offset inside the block for the page
403 * in the lower bits, it contains flags
405 static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
407 size_t size, len;
409 qemu_put_be64(f, offset);
410 size = 8;
412 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
413 len = strlen(block->idstr);
414 qemu_put_byte(f, len);
415 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
416 size += 1 + len;
418 return size;
421 /* Reduce amount of guest cpu execution to hopefully slow down memory writes.
422 * If guest dirty memory rate is reduced below the rate at which we can
423 * transfer pages to the destination then we should be able to complete
424 * migration. Some workloads dirty memory way too fast and will not effectively
425 * converge, even with auto-converge.
427 static void mig_throttle_guest_down(void)
429 MigrationState *s = migrate_get_current();
430 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
431 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
433 /* We have not started throttling yet. Let's start it. */
434 if (!cpu_throttle_active()) {
435 cpu_throttle_set(pct_initial);
436 } else {
437 /* Throttling already on, just increase the rate */
438 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
442 /* Update the xbzrle cache to reflect a page that's been sent as all 0.
443 * The important thing is that a stale (not-yet-0'd) page be replaced
444 * by the new data.
445 * As a bonus, if the page wasn't in the cache it gets added so that
446 * when a small write is made into the 0'd page it gets XBZRLE sent
448 static void xbzrle_cache_zero_page(ram_addr_t current_addr)
450 if (ram_bulk_stage || !migrate_use_xbzrle()) {
451 return;
454 /* We don't care if this fails to allocate a new cache page
455 * as long as it updated an old one */
456 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
457 bitmap_sync_count);
460 #define ENCODING_FLAG_XBZRLE 0x1
463 * save_xbzrle_page: compress and send current page
465 * Returns: 1 means that we wrote the page
466 * 0 means that page is identical to the one already sent
467 * -1 means that xbzrle would be longer than normal
469 * @f: QEMUFile where to send the data
470 * @current_data:
471 * @current_addr:
472 * @block: block that contains the page we want to send
473 * @offset: offset inside the block for the page
474 * @last_stage: if we are at the completion stage
475 * @bytes_transferred: increase it with the number of transferred bytes
477 static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
478 ram_addr_t current_addr, RAMBlock *block,
479 ram_addr_t offset, bool last_stage,
480 uint64_t *bytes_transferred)
482 int encoded_len = 0, bytes_xbzrle;
483 uint8_t *prev_cached_page;
485 if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) {
486 acct_info.xbzrle_cache_miss++;
487 if (!last_stage) {
488 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
489 bitmap_sync_count) == -1) {
490 return -1;
491 } else {
492 /* update *current_data when the page has been
493 inserted into cache */
494 *current_data = get_cached_data(XBZRLE.cache, current_addr);
497 return -1;
500 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
502 /* save current buffer into memory */
503 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
505 /* XBZRLE encoding (if there is no overflow) */
506 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
507 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
508 TARGET_PAGE_SIZE);
509 if (encoded_len == 0) {
510 DPRINTF("Skipping unmodified page\n");
511 return 0;
512 } else if (encoded_len == -1) {
513 DPRINTF("Overflow\n");
514 acct_info.xbzrle_overflows++;
515 /* update data in the cache */
516 if (!last_stage) {
517 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
518 *current_data = prev_cached_page;
520 return -1;
523 /* we need to update the data in the cache, in order to get the same data */
524 if (!last_stage) {
525 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
528 /* Send XBZRLE based compressed page */
529 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
530 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
531 qemu_put_be16(f, encoded_len);
532 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
533 bytes_xbzrle += encoded_len + 1 + 2;
534 acct_info.xbzrle_pages++;
535 acct_info.xbzrle_bytes += bytes_xbzrle;
536 *bytes_transferred += bytes_xbzrle;
538 return 1;
541 /* Called with rcu_read_lock() to protect migration_bitmap
542 * rb: The RAMBlock to search for dirty pages in
543 * start: Start address (typically so we can continue from previous page)
544 * ram_addr_abs: Pointer into which to store the address of the dirty page
545 * within the global ram_addr space
547 * Returns: byte offset within memory region of the start of a dirty page
549 static inline
550 ram_addr_t migration_bitmap_find_dirty(RAMBlock *rb,
551 ram_addr_t start,
552 ram_addr_t *ram_addr_abs)
554 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
555 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
556 uint64_t rb_size = rb->used_length;
557 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
558 unsigned long *bitmap;
560 unsigned long next;
562 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
563 if (ram_bulk_stage && nr > base) {
564 next = nr + 1;
565 } else {
566 next = find_next_bit(bitmap, size, nr);
569 *ram_addr_abs = next << TARGET_PAGE_BITS;
570 return (next - base) << TARGET_PAGE_BITS;
573 static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
575 bool ret;
576 int nr = addr >> TARGET_PAGE_BITS;
577 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
579 ret = test_and_clear_bit(nr, bitmap);
581 if (ret) {
582 migration_dirty_pages--;
584 return ret;
587 static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
589 unsigned long *bitmap;
590 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
591 migration_dirty_pages +=
592 cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
595 /* Fix me: there are too many global variables used in migration process. */
596 static int64_t start_time;
597 static int64_t bytes_xfer_prev;
598 static int64_t num_dirty_pages_period;
599 static uint64_t xbzrle_cache_miss_prev;
600 static uint64_t iterations_prev;
602 static void migration_bitmap_sync_init(void)
604 start_time = 0;
605 bytes_xfer_prev = 0;
606 num_dirty_pages_period = 0;
607 xbzrle_cache_miss_prev = 0;
608 iterations_prev = 0;
611 static void migration_bitmap_sync(void)
613 RAMBlock *block;
614 uint64_t num_dirty_pages_init = migration_dirty_pages;
615 MigrationState *s = migrate_get_current();
616 int64_t end_time;
617 int64_t bytes_xfer_now;
619 bitmap_sync_count++;
621 if (!bytes_xfer_prev) {
622 bytes_xfer_prev = ram_bytes_transferred();
625 if (!start_time) {
626 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
629 trace_migration_bitmap_sync_start();
630 memory_global_dirty_log_sync();
632 qemu_mutex_lock(&migration_bitmap_mutex);
633 rcu_read_lock();
634 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
635 migration_bitmap_sync_range(block->offset, block->used_length);
637 rcu_read_unlock();
638 qemu_mutex_unlock(&migration_bitmap_mutex);
640 trace_migration_bitmap_sync_end(migration_dirty_pages
641 - num_dirty_pages_init);
642 num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
643 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
645 /* more than 1 second = 1000 millisecons */
646 if (end_time > start_time + 1000) {
647 if (migrate_auto_converge()) {
648 /* The following detection logic can be refined later. For now:
649 Check to see if the dirtied bytes is 50% more than the approx.
650 amount of bytes that just got transferred since the last time we
651 were in this routine. If that happens twice, start or increase
652 throttling */
653 bytes_xfer_now = ram_bytes_transferred();
655 if (s->dirty_pages_rate &&
656 (num_dirty_pages_period * TARGET_PAGE_SIZE >
657 (bytes_xfer_now - bytes_xfer_prev)/2) &&
658 (dirty_rate_high_cnt++ >= 2)) {
659 trace_migration_throttle();
660 dirty_rate_high_cnt = 0;
661 mig_throttle_guest_down();
663 bytes_xfer_prev = bytes_xfer_now;
666 if (migrate_use_xbzrle()) {
667 if (iterations_prev != acct_info.iterations) {
668 acct_info.xbzrle_cache_miss_rate =
669 (double)(acct_info.xbzrle_cache_miss -
670 xbzrle_cache_miss_prev) /
671 (acct_info.iterations - iterations_prev);
673 iterations_prev = acct_info.iterations;
674 xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
676 s->dirty_pages_rate = num_dirty_pages_period * 1000
677 / (end_time - start_time);
678 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
679 start_time = end_time;
680 num_dirty_pages_period = 0;
682 s->dirty_sync_count = bitmap_sync_count;
683 if (migrate_use_events()) {
684 qapi_event_send_migration_pass(bitmap_sync_count, NULL);
689 * save_zero_page: Send the zero page to the stream
691 * Returns: Number of pages written.
693 * @f: QEMUFile where to send the data
694 * @block: block that contains the page we want to send
695 * @offset: offset inside the block for the page
696 * @p: pointer to the page
697 * @bytes_transferred: increase it with the number of transferred bytes
699 static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
700 uint8_t *p, uint64_t *bytes_transferred)
702 int pages = -1;
704 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
705 acct_info.dup_pages++;
706 *bytes_transferred += save_page_header(f, block,
707 offset | RAM_SAVE_FLAG_COMPRESS);
708 qemu_put_byte(f, 0);
709 *bytes_transferred += 1;
710 pages = 1;
713 return pages;
717 * ram_save_page: Send the given page to the stream
719 * Returns: Number of pages written.
720 * < 0 - error
721 * >=0 - Number of pages written - this might legally be 0
722 * if xbzrle noticed the page was the same.
724 * @f: QEMUFile where to send the data
725 * @block: block that contains the page we want to send
726 * @offset: offset inside the block for the page
727 * @last_stage: if we are at the completion stage
728 * @bytes_transferred: increase it with the number of transferred bytes
730 static int ram_save_page(QEMUFile *f, PageSearchStatus *pss,
731 bool last_stage, uint64_t *bytes_transferred)
733 int pages = -1;
734 uint64_t bytes_xmit;
735 ram_addr_t current_addr;
736 uint8_t *p;
737 int ret;
738 bool send_async = true;
739 RAMBlock *block = pss->block;
740 ram_addr_t offset = pss->offset;
742 p = block->host + offset;
744 /* In doubt sent page as normal */
745 bytes_xmit = 0;
746 ret = ram_control_save_page(f, block->offset,
747 offset, TARGET_PAGE_SIZE, &bytes_xmit);
748 if (bytes_xmit) {
749 *bytes_transferred += bytes_xmit;
750 pages = 1;
753 XBZRLE_cache_lock();
755 current_addr = block->offset + offset;
757 if (block == last_sent_block) {
758 offset |= RAM_SAVE_FLAG_CONTINUE;
760 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
761 if (ret != RAM_SAVE_CONTROL_DELAYED) {
762 if (bytes_xmit > 0) {
763 acct_info.norm_pages++;
764 } else if (bytes_xmit == 0) {
765 acct_info.dup_pages++;
768 } else {
769 pages = save_zero_page(f, block, offset, p, bytes_transferred);
770 if (pages > 0) {
771 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
772 * page would be stale
774 xbzrle_cache_zero_page(current_addr);
775 } else if (!ram_bulk_stage &&
776 !migration_in_postcopy(migrate_get_current()) &&
777 migrate_use_xbzrle()) {
778 pages = save_xbzrle_page(f, &p, current_addr, block,
779 offset, last_stage, bytes_transferred);
780 if (!last_stage) {
781 /* Can't send this cached data async, since the cache page
782 * might get updated before it gets to the wire
784 send_async = false;
789 /* XBZRLE overflow or normal page */
790 if (pages == -1) {
791 *bytes_transferred += save_page_header(f, block,
792 offset | RAM_SAVE_FLAG_PAGE);
793 if (send_async) {
794 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
795 } else {
796 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
798 *bytes_transferred += TARGET_PAGE_SIZE;
799 pages = 1;
800 acct_info.norm_pages++;
803 XBZRLE_cache_unlock();
805 return pages;
808 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
809 ram_addr_t offset)
811 int bytes_sent, blen;
812 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
814 bytes_sent = save_page_header(f, block, offset |
815 RAM_SAVE_FLAG_COMPRESS_PAGE);
816 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
817 migrate_compress_level());
818 if (blen < 0) {
819 bytes_sent = 0;
820 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
821 error_report("compressed data failed!");
822 } else {
823 bytes_sent += blen;
826 return bytes_sent;
829 static uint64_t bytes_transferred;
831 static void flush_compressed_data(QEMUFile *f)
833 int idx, len, thread_count;
835 if (!migrate_use_compression()) {
836 return;
838 thread_count = migrate_compress_threads();
840 qemu_mutex_lock(&comp_done_lock);
841 for (idx = 0; idx < thread_count; idx++) {
842 while (!comp_param[idx].done) {
843 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
846 qemu_mutex_unlock(&comp_done_lock);
848 for (idx = 0; idx < thread_count; idx++) {
849 qemu_mutex_lock(&comp_param[idx].mutex);
850 if (!comp_param[idx].quit) {
851 len = qemu_put_qemu_file(f, comp_param[idx].file);
852 bytes_transferred += len;
854 qemu_mutex_unlock(&comp_param[idx].mutex);
858 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
859 ram_addr_t offset)
861 param->block = block;
862 param->offset = offset;
865 static int compress_page_with_multi_thread(QEMUFile *f, RAMBlock *block,
866 ram_addr_t offset,
867 uint64_t *bytes_transferred)
869 int idx, thread_count, bytes_xmit = -1, pages = -1;
871 thread_count = migrate_compress_threads();
872 qemu_mutex_lock(&comp_done_lock);
873 while (true) {
874 for (idx = 0; idx < thread_count; idx++) {
875 if (comp_param[idx].done) {
876 comp_param[idx].done = false;
877 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
878 qemu_mutex_lock(&comp_param[idx].mutex);
879 set_compress_params(&comp_param[idx], block, offset);
880 qemu_cond_signal(&comp_param[idx].cond);
881 qemu_mutex_unlock(&comp_param[idx].mutex);
882 pages = 1;
883 acct_info.norm_pages++;
884 *bytes_transferred += bytes_xmit;
885 break;
888 if (pages > 0) {
889 break;
890 } else {
891 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
894 qemu_mutex_unlock(&comp_done_lock);
896 return pages;
900 * ram_save_compressed_page: compress the given page and send it to the stream
902 * Returns: Number of pages written.
904 * @f: QEMUFile where to send the data
905 * @block: block that contains the page we want to send
906 * @offset: offset inside the block for the page
907 * @last_stage: if we are at the completion stage
908 * @bytes_transferred: increase it with the number of transferred bytes
910 static int ram_save_compressed_page(QEMUFile *f, PageSearchStatus *pss,
911 bool last_stage,
912 uint64_t *bytes_transferred)
914 int pages = -1;
915 uint64_t bytes_xmit = 0;
916 uint8_t *p;
917 int ret, blen;
918 RAMBlock *block = pss->block;
919 ram_addr_t offset = pss->offset;
921 p = block->host + offset;
923 ret = ram_control_save_page(f, block->offset,
924 offset, TARGET_PAGE_SIZE, &bytes_xmit);
925 if (bytes_xmit) {
926 *bytes_transferred += bytes_xmit;
927 pages = 1;
929 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
930 if (ret != RAM_SAVE_CONTROL_DELAYED) {
931 if (bytes_xmit > 0) {
932 acct_info.norm_pages++;
933 } else if (bytes_xmit == 0) {
934 acct_info.dup_pages++;
937 } else {
938 /* When starting the process of a new block, the first page of
939 * the block should be sent out before other pages in the same
940 * block, and all the pages in last block should have been sent
941 * out, keeping this order is important, because the 'cont' flag
942 * is used to avoid resending the block name.
944 if (block != last_sent_block) {
945 flush_compressed_data(f);
946 pages = save_zero_page(f, block, offset, p, bytes_transferred);
947 if (pages == -1) {
948 /* Make sure the first page is sent out before other pages */
949 bytes_xmit = save_page_header(f, block, offset |
950 RAM_SAVE_FLAG_COMPRESS_PAGE);
951 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
952 migrate_compress_level());
953 if (blen > 0) {
954 *bytes_transferred += bytes_xmit + blen;
955 acct_info.norm_pages++;
956 pages = 1;
957 } else {
958 qemu_file_set_error(f, blen);
959 error_report("compressed data failed!");
962 } else {
963 offset |= RAM_SAVE_FLAG_CONTINUE;
964 pages = save_zero_page(f, block, offset, p, bytes_transferred);
965 if (pages == -1) {
966 pages = compress_page_with_multi_thread(f, block, offset,
967 bytes_transferred);
972 return pages;
976 * Find the next dirty page and update any state associated with
977 * the search process.
979 * Returns: True if a page is found
981 * @f: Current migration stream.
982 * @pss: Data about the state of the current dirty page scan.
983 * @*again: Set to false if the search has scanned the whole of RAM
984 * *ram_addr_abs: Pointer into which to store the address of the dirty page
985 * within the global ram_addr space
987 static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
988 bool *again, ram_addr_t *ram_addr_abs)
990 pss->offset = migration_bitmap_find_dirty(pss->block, pss->offset,
991 ram_addr_abs);
992 if (pss->complete_round && pss->block == last_seen_block &&
993 pss->offset >= last_offset) {
995 * We've been once around the RAM and haven't found anything.
996 * Give up.
998 *again = false;
999 return false;
1001 if (pss->offset >= pss->block->used_length) {
1002 /* Didn't find anything in this RAM Block */
1003 pss->offset = 0;
1004 pss->block = QLIST_NEXT_RCU(pss->block, next);
1005 if (!pss->block) {
1006 /* Hit the end of the list */
1007 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1008 /* Flag that we've looped */
1009 pss->complete_round = true;
1010 ram_bulk_stage = false;
1011 if (migrate_use_xbzrle()) {
1012 /* If xbzrle is on, stop using the data compression at this
1013 * point. In theory, xbzrle can do better than compression.
1015 flush_compressed_data(f);
1016 compression_switch = false;
1019 /* Didn't find anything this time, but try again on the new block */
1020 *again = true;
1021 return false;
1022 } else {
1023 /* Can go around again, but... */
1024 *again = true;
1025 /* We've found something so probably don't need to */
1026 return true;
1031 * Helper for 'get_queued_page' - gets a page off the queue
1032 * ms: MigrationState in
1033 * *offset: Used to return the offset within the RAMBlock
1034 * ram_addr_abs: global offset in the dirty/sent bitmaps
1036 * Returns: block (or NULL if none available)
1038 static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1039 ram_addr_t *ram_addr_abs)
1041 RAMBlock *block = NULL;
1043 qemu_mutex_lock(&ms->src_page_req_mutex);
1044 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1045 struct MigrationSrcPageRequest *entry =
1046 QSIMPLEQ_FIRST(&ms->src_page_requests);
1047 block = entry->rb;
1048 *offset = entry->offset;
1049 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1050 TARGET_PAGE_MASK;
1052 if (entry->len > TARGET_PAGE_SIZE) {
1053 entry->len -= TARGET_PAGE_SIZE;
1054 entry->offset += TARGET_PAGE_SIZE;
1055 } else {
1056 memory_region_unref(block->mr);
1057 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1058 g_free(entry);
1061 qemu_mutex_unlock(&ms->src_page_req_mutex);
1063 return block;
1067 * Unqueue a page from the queue fed by postcopy page requests; skips pages
1068 * that are already sent (!dirty)
1070 * ms: MigrationState in
1071 * pss: PageSearchStatus structure updated with found block/offset
1072 * ram_addr_abs: global offset in the dirty/sent bitmaps
1074 * Returns: true if a queued page is found
1076 static bool get_queued_page(MigrationState *ms, PageSearchStatus *pss,
1077 ram_addr_t *ram_addr_abs)
1079 RAMBlock *block;
1080 ram_addr_t offset;
1081 bool dirty;
1083 do {
1084 block = unqueue_page(ms, &offset, ram_addr_abs);
1086 * We're sending this page, and since it's postcopy nothing else
1087 * will dirty it, and we must make sure it doesn't get sent again
1088 * even if this queue request was received after the background
1089 * search already sent it.
1091 if (block) {
1092 unsigned long *bitmap;
1093 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1094 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1095 if (!dirty) {
1096 trace_get_queued_page_not_dirty(
1097 block->idstr, (uint64_t)offset,
1098 (uint64_t)*ram_addr_abs,
1099 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1100 atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1101 } else {
1102 trace_get_queued_page(block->idstr,
1103 (uint64_t)offset,
1104 (uint64_t)*ram_addr_abs);
1108 } while (block && !dirty);
1110 if (block) {
1112 * As soon as we start servicing pages out of order, then we have
1113 * to kill the bulk stage, since the bulk stage assumes
1114 * in (migration_bitmap_find_and_reset_dirty) that every page is
1115 * dirty, that's no longer true.
1117 ram_bulk_stage = false;
1120 * We want the background search to continue from the queued page
1121 * since the guest is likely to want other pages near to the page
1122 * it just requested.
1124 pss->block = block;
1125 pss->offset = offset;
1128 return !!block;
1132 * flush_page_queue: Flush any remaining pages in the ram request queue
1133 * it should be empty at the end anyway, but in error cases there may be
1134 * some left.
1136 * ms: MigrationState
1138 void flush_page_queue(MigrationState *ms)
1140 struct MigrationSrcPageRequest *mspr, *next_mspr;
1141 /* This queue generally should be empty - but in the case of a failed
1142 * migration might have some droppings in.
1144 rcu_read_lock();
1145 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1146 memory_region_unref(mspr->rb->mr);
1147 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1148 g_free(mspr);
1150 rcu_read_unlock();
1154 * Queue the pages for transmission, e.g. a request from postcopy destination
1155 * ms: MigrationStatus in which the queue is held
1156 * rbname: The RAMBlock the request is for - may be NULL (to mean reuse last)
1157 * start: Offset from the start of the RAMBlock
1158 * len: Length (in bytes) to send
1159 * Return: 0 on success
1161 int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1162 ram_addr_t start, ram_addr_t len)
1164 RAMBlock *ramblock;
1166 ms->postcopy_requests++;
1167 rcu_read_lock();
1168 if (!rbname) {
1169 /* Reuse last RAMBlock */
1170 ramblock = ms->last_req_rb;
1172 if (!ramblock) {
1174 * Shouldn't happen, we can't reuse the last RAMBlock if
1175 * it's the 1st request.
1177 error_report("ram_save_queue_pages no previous block");
1178 goto err;
1180 } else {
1181 ramblock = qemu_ram_block_by_name(rbname);
1183 if (!ramblock) {
1184 /* We shouldn't be asked for a non-existent RAMBlock */
1185 error_report("ram_save_queue_pages no block '%s'", rbname);
1186 goto err;
1188 ms->last_req_rb = ramblock;
1190 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1191 if (start+len > ramblock->used_length) {
1192 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1193 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1194 __func__, start, len, ramblock->used_length);
1195 goto err;
1198 struct MigrationSrcPageRequest *new_entry =
1199 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1200 new_entry->rb = ramblock;
1201 new_entry->offset = start;
1202 new_entry->len = len;
1204 memory_region_ref(ramblock->mr);
1205 qemu_mutex_lock(&ms->src_page_req_mutex);
1206 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1207 qemu_mutex_unlock(&ms->src_page_req_mutex);
1208 rcu_read_unlock();
1210 return 0;
1212 err:
1213 rcu_read_unlock();
1214 return -1;
1218 * ram_save_target_page: Save one target page
1221 * @f: QEMUFile where to send the data
1222 * @block: pointer to block that contains the page we want to send
1223 * @offset: offset inside the block for the page;
1224 * @last_stage: if we are at the completion stage
1225 * @bytes_transferred: increase it with the number of transferred bytes
1226 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1228 * Returns: Number of pages written.
1230 static int ram_save_target_page(MigrationState *ms, QEMUFile *f,
1231 PageSearchStatus *pss,
1232 bool last_stage,
1233 uint64_t *bytes_transferred,
1234 ram_addr_t dirty_ram_abs)
1236 int res = 0;
1238 /* Check the pages is dirty and if it is send it */
1239 if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1240 unsigned long *unsentmap;
1241 if (compression_switch && migrate_use_compression()) {
1242 res = ram_save_compressed_page(f, pss,
1243 last_stage,
1244 bytes_transferred);
1245 } else {
1246 res = ram_save_page(f, pss, last_stage,
1247 bytes_transferred);
1250 if (res < 0) {
1251 return res;
1253 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1254 if (unsentmap) {
1255 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1257 /* Only update last_sent_block if a block was actually sent; xbzrle
1258 * might have decided the page was identical so didn't bother writing
1259 * to the stream.
1261 if (res > 0) {
1262 last_sent_block = pss->block;
1266 return res;
1270 * ram_save_host_page: Starting at *offset send pages up to the end
1271 * of the current host page. It's valid for the initial
1272 * offset to point into the middle of a host page
1273 * in which case the remainder of the hostpage is sent.
1274 * Only dirty target pages are sent.
1276 * Returns: Number of pages written.
1278 * @f: QEMUFile where to send the data
1279 * @block: pointer to block that contains the page we want to send
1280 * @offset: offset inside the block for the page; updated to last target page
1281 * sent
1282 * @last_stage: if we are at the completion stage
1283 * @bytes_transferred: increase it with the number of transferred bytes
1284 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1286 static int ram_save_host_page(MigrationState *ms, QEMUFile *f,
1287 PageSearchStatus *pss,
1288 bool last_stage,
1289 uint64_t *bytes_transferred,
1290 ram_addr_t dirty_ram_abs)
1292 int tmppages, pages = 0;
1293 do {
1294 tmppages = ram_save_target_page(ms, f, pss, last_stage,
1295 bytes_transferred, dirty_ram_abs);
1296 if (tmppages < 0) {
1297 return tmppages;
1300 pages += tmppages;
1301 pss->offset += TARGET_PAGE_SIZE;
1302 dirty_ram_abs += TARGET_PAGE_SIZE;
1303 } while (pss->offset & (qemu_host_page_size - 1));
1305 /* The offset we leave with is the last one we looked at */
1306 pss->offset -= TARGET_PAGE_SIZE;
1307 return pages;
1311 * ram_find_and_save_block: Finds a dirty page and sends it to f
1313 * Called within an RCU critical section.
1315 * Returns: The number of pages written
1316 * 0 means no dirty pages
1318 * @f: QEMUFile where to send the data
1319 * @last_stage: if we are at the completion stage
1320 * @bytes_transferred: increase it with the number of transferred bytes
1322 * On systems where host-page-size > target-page-size it will send all the
1323 * pages in a host page that are dirty.
1326 static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
1327 uint64_t *bytes_transferred)
1329 PageSearchStatus pss;
1330 MigrationState *ms = migrate_get_current();
1331 int pages = 0;
1332 bool again, found;
1333 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1334 ram_addr_t space */
1336 pss.block = last_seen_block;
1337 pss.offset = last_offset;
1338 pss.complete_round = false;
1340 if (!pss.block) {
1341 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1344 do {
1345 again = true;
1346 found = get_queued_page(ms, &pss, &dirty_ram_abs);
1348 if (!found) {
1349 /* priority queue empty, so just search for something dirty */
1350 found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
1353 if (found) {
1354 pages = ram_save_host_page(ms, f, &pss,
1355 last_stage, bytes_transferred,
1356 dirty_ram_abs);
1358 } while (!pages && again);
1360 last_seen_block = pss.block;
1361 last_offset = pss.offset;
1363 return pages;
1366 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1368 uint64_t pages = size / TARGET_PAGE_SIZE;
1369 if (zero) {
1370 acct_info.dup_pages += pages;
1371 } else {
1372 acct_info.norm_pages += pages;
1373 bytes_transferred += size;
1374 qemu_update_position(f, size);
1378 static ram_addr_t ram_save_remaining(void)
1380 return migration_dirty_pages;
1383 uint64_t ram_bytes_remaining(void)
1385 return ram_save_remaining() * TARGET_PAGE_SIZE;
1388 uint64_t ram_bytes_transferred(void)
1390 return bytes_transferred;
1393 uint64_t ram_bytes_total(void)
1395 RAMBlock *block;
1396 uint64_t total = 0;
1398 rcu_read_lock();
1399 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1400 total += block->used_length;
1401 rcu_read_unlock();
1402 return total;
1405 void free_xbzrle_decoded_buf(void)
1407 g_free(xbzrle_decoded_buf);
1408 xbzrle_decoded_buf = NULL;
1411 static void migration_bitmap_free(struct BitmapRcu *bmap)
1413 g_free(bmap->bmap);
1414 g_free(bmap->unsentmap);
1415 g_free(bmap);
1418 static void ram_migration_cleanup(void *opaque)
1420 /* caller have hold iothread lock or is in a bh, so there is
1421 * no writing race against this migration_bitmap
1423 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1424 atomic_rcu_set(&migration_bitmap_rcu, NULL);
1425 if (bitmap) {
1426 memory_global_dirty_log_stop();
1427 call_rcu(bitmap, migration_bitmap_free, rcu);
1430 XBZRLE_cache_lock();
1431 if (XBZRLE.cache) {
1432 cache_fini(XBZRLE.cache);
1433 g_free(XBZRLE.encoded_buf);
1434 g_free(XBZRLE.current_buf);
1435 g_free(ZERO_TARGET_PAGE);
1436 XBZRLE.cache = NULL;
1437 XBZRLE.encoded_buf = NULL;
1438 XBZRLE.current_buf = NULL;
1440 XBZRLE_cache_unlock();
1443 static void reset_ram_globals(void)
1445 last_seen_block = NULL;
1446 last_sent_block = NULL;
1447 last_offset = 0;
1448 last_version = ram_list.version;
1449 ram_bulk_stage = true;
1452 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1454 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1456 /* called in qemu main thread, so there is
1457 * no writing race against this migration_bitmap
1459 if (migration_bitmap_rcu) {
1460 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1461 bitmap = g_new(struct BitmapRcu, 1);
1462 bitmap->bmap = bitmap_new(new);
1464 /* prevent migration_bitmap content from being set bit
1465 * by migration_bitmap_sync_range() at the same time.
1466 * it is safe to migration if migration_bitmap is cleared bit
1467 * at the same time.
1469 qemu_mutex_lock(&migration_bitmap_mutex);
1470 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1471 bitmap_set(bitmap->bmap, old, new - old);
1473 /* We don't have a way to safely extend the sentmap
1474 * with RCU; so mark it as missing, entry to postcopy
1475 * will fail.
1477 bitmap->unsentmap = NULL;
1479 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
1480 qemu_mutex_unlock(&migration_bitmap_mutex);
1481 migration_dirty_pages += new - old;
1482 call_rcu(old_bitmap, migration_bitmap_free, rcu);
1487 * 'expected' is the value you expect the bitmap mostly to be full
1488 * of; it won't bother printing lines that are all this value.
1489 * If 'todump' is null the migration bitmap is dumped.
1491 void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1493 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1495 int64_t cur;
1496 int64_t linelen = 128;
1497 char linebuf[129];
1499 if (!todump) {
1500 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1503 for (cur = 0; cur < ram_pages; cur += linelen) {
1504 int64_t curb;
1505 bool found = false;
1507 * Last line; catch the case where the line length
1508 * is longer than remaining ram
1510 if (cur + linelen > ram_pages) {
1511 linelen = ram_pages - cur;
1513 for (curb = 0; curb < linelen; curb++) {
1514 bool thisbit = test_bit(cur + curb, todump);
1515 linebuf[curb] = thisbit ? '1' : '.';
1516 found = found || (thisbit != expected);
1518 if (found) {
1519 linebuf[curb] = '\0';
1520 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1525 /* **** functions for postcopy ***** */
1528 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1529 * Note: At this point the 'unsentmap' is the processed bitmap combined
1530 * with the dirtymap; so a '1' means it's either dirty or unsent.
1531 * start,length: Indexes into the bitmap for the first bit
1532 * representing the named block and length in target-pages
1534 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1535 PostcopyDiscardState *pds,
1536 unsigned long start,
1537 unsigned long length)
1539 unsigned long end = start + length; /* one after the end */
1540 unsigned long current;
1541 unsigned long *unsentmap;
1543 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1544 for (current = start; current < end; ) {
1545 unsigned long one = find_next_bit(unsentmap, end, current);
1547 if (one <= end) {
1548 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1549 unsigned long discard_length;
1551 if (zero >= end) {
1552 discard_length = end - one;
1553 } else {
1554 discard_length = zero - one;
1556 if (discard_length) {
1557 postcopy_discard_send_range(ms, pds, one, discard_length);
1559 current = one + discard_length;
1560 } else {
1561 current = one;
1565 return 0;
1569 * Utility for the outgoing postcopy code.
1570 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1571 * passing it bitmap indexes and name.
1572 * Returns: 0 on success
1573 * (qemu_ram_foreach_block ends up passing unscaled lengths
1574 * which would mean postcopy code would have to deal with target page)
1576 static int postcopy_each_ram_send_discard(MigrationState *ms)
1578 struct RAMBlock *block;
1579 int ret;
1581 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1582 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1583 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1584 first,
1585 block->idstr);
1588 * Postcopy sends chunks of bitmap over the wire, but it
1589 * just needs indexes at this point, avoids it having
1590 * target page specific code.
1592 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1593 block->used_length >> TARGET_PAGE_BITS);
1594 postcopy_discard_send_finish(ms, pds);
1595 if (ret) {
1596 return ret;
1600 return 0;
1604 * Helper for postcopy_chunk_hostpages; it's called twice to cleanup
1605 * the two bitmaps, that are similar, but one is inverted.
1607 * We search for runs of target-pages that don't start or end on a
1608 * host page boundary;
1609 * unsent_pass=true: Cleans up partially unsent host pages by searching
1610 * the unsentmap
1611 * unsent_pass=false: Cleans up partially dirty host pages by searching
1612 * the main migration bitmap
1615 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1616 RAMBlock *block,
1617 PostcopyDiscardState *pds)
1619 unsigned long *bitmap;
1620 unsigned long *unsentmap;
1621 unsigned int host_ratio = qemu_host_page_size / TARGET_PAGE_SIZE;
1622 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1623 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1624 unsigned long last = first + (len - 1);
1625 unsigned long run_start;
1627 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1628 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1630 if (unsent_pass) {
1631 /* Find a sent page */
1632 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1633 } else {
1634 /* Find a dirty page */
1635 run_start = find_next_bit(bitmap, last + 1, first);
1638 while (run_start <= last) {
1639 bool do_fixup = false;
1640 unsigned long fixup_start_addr;
1641 unsigned long host_offset;
1644 * If the start of this run of pages is in the middle of a host
1645 * page, then we need to fixup this host page.
1647 host_offset = run_start % host_ratio;
1648 if (host_offset) {
1649 do_fixup = true;
1650 run_start -= host_offset;
1651 fixup_start_addr = run_start;
1652 /* For the next pass */
1653 run_start = run_start + host_ratio;
1654 } else {
1655 /* Find the end of this run */
1656 unsigned long run_end;
1657 if (unsent_pass) {
1658 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1659 } else {
1660 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1663 * If the end isn't at the start of a host page, then the
1664 * run doesn't finish at the end of a host page
1665 * and we need to discard.
1667 host_offset = run_end % host_ratio;
1668 if (host_offset) {
1669 do_fixup = true;
1670 fixup_start_addr = run_end - host_offset;
1672 * This host page has gone, the next loop iteration starts
1673 * from after the fixup
1675 run_start = fixup_start_addr + host_ratio;
1676 } else {
1678 * No discards on this iteration, next loop starts from
1679 * next sent/dirty page
1681 run_start = run_end + 1;
1685 if (do_fixup) {
1686 unsigned long page;
1688 /* Tell the destination to discard this page */
1689 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1690 /* For the unsent_pass we:
1691 * discard partially sent pages
1692 * For the !unsent_pass (dirty) we:
1693 * discard partially dirty pages that were sent
1694 * (any partially sent pages were already discarded
1695 * by the previous unsent_pass)
1697 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1698 host_ratio);
1701 /* Clean up the bitmap */
1702 for (page = fixup_start_addr;
1703 page < fixup_start_addr + host_ratio; page++) {
1704 /* All pages in this host page are now not sent */
1705 set_bit(page, unsentmap);
1708 * Remark them as dirty, updating the count for any pages
1709 * that weren't previously dirty.
1711 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1715 if (unsent_pass) {
1716 /* Find the next sent page for the next iteration */
1717 run_start = find_next_zero_bit(unsentmap, last + 1,
1718 run_start);
1719 } else {
1720 /* Find the next dirty page for the next iteration */
1721 run_start = find_next_bit(bitmap, last + 1, run_start);
1727 * Utility for the outgoing postcopy code.
1729 * Discard any partially sent host-page size chunks, mark any partially
1730 * dirty host-page size chunks as all dirty.
1732 * Returns: 0 on success
1734 static int postcopy_chunk_hostpages(MigrationState *ms)
1736 struct RAMBlock *block;
1738 if (qemu_host_page_size == TARGET_PAGE_SIZE) {
1739 /* Easy case - TPS==HPS - nothing to be done */
1740 return 0;
1743 /* Easiest way to make sure we don't resume in the middle of a host-page */
1744 last_seen_block = NULL;
1745 last_sent_block = NULL;
1746 last_offset = 0;
1748 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1749 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1751 PostcopyDiscardState *pds =
1752 postcopy_discard_send_init(ms, first, block->idstr);
1754 /* First pass: Discard all partially sent host pages */
1755 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1757 * Second pass: Ensure that all partially dirty host pages are made
1758 * fully dirty.
1760 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1762 postcopy_discard_send_finish(ms, pds);
1763 } /* ram_list loop */
1765 return 0;
1769 * Transmit the set of pages to be discarded after precopy to the target
1770 * these are pages that:
1771 * a) Have been previously transmitted but are now dirty again
1772 * b) Pages that have never been transmitted, this ensures that
1773 * any pages on the destination that have been mapped by background
1774 * tasks get discarded (transparent huge pages is the specific concern)
1775 * Hopefully this is pretty sparse
1777 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1779 int ret;
1780 unsigned long *bitmap, *unsentmap;
1782 rcu_read_lock();
1784 /* This should be our last sync, the src is now paused */
1785 migration_bitmap_sync();
1787 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1788 if (!unsentmap) {
1789 /* We don't have a safe way to resize the sentmap, so
1790 * if the bitmap was resized it will be NULL at this
1791 * point.
1793 error_report("migration ram resized during precopy phase");
1794 rcu_read_unlock();
1795 return -EINVAL;
1798 /* Deal with TPS != HPS */
1799 ret = postcopy_chunk_hostpages(ms);
1800 if (ret) {
1801 rcu_read_unlock();
1802 return ret;
1806 * Update the unsentmap to be unsentmap = unsentmap | dirty
1808 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1809 bitmap_or(unsentmap, unsentmap, bitmap,
1810 last_ram_offset() >> TARGET_PAGE_BITS);
1813 trace_ram_postcopy_send_discard_bitmap();
1814 #ifdef DEBUG_POSTCOPY
1815 ram_debug_dump_bitmap(unsentmap, true);
1816 #endif
1818 ret = postcopy_each_ram_send_discard(ms);
1819 rcu_read_unlock();
1821 return ret;
1825 * At the start of the postcopy phase of migration, any now-dirty
1826 * precopied pages are discarded.
1828 * start, length describe a byte address range within the RAMBlock
1830 * Returns 0 on success.
1832 int ram_discard_range(MigrationIncomingState *mis,
1833 const char *block_name,
1834 uint64_t start, size_t length)
1836 int ret = -1;
1838 rcu_read_lock();
1839 RAMBlock *rb = qemu_ram_block_by_name(block_name);
1841 if (!rb) {
1842 error_report("ram_discard_range: Failed to find block '%s'",
1843 block_name);
1844 goto err;
1847 uint8_t *host_startaddr = rb->host + start;
1849 if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
1850 error_report("ram_discard_range: Unaligned start address: %p",
1851 host_startaddr);
1852 goto err;
1855 if ((start + length) <= rb->used_length) {
1856 uint8_t *host_endaddr = host_startaddr + length;
1857 if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
1858 error_report("ram_discard_range: Unaligned end address: %p",
1859 host_endaddr);
1860 goto err;
1862 ret = postcopy_ram_discard_range(mis, host_startaddr, length);
1863 } else {
1864 error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
1865 "/%zx/" RAM_ADDR_FMT")",
1866 block_name, start, length, rb->used_length);
1869 err:
1870 rcu_read_unlock();
1872 return ret;
1875 static int ram_save_init_globals(void)
1877 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1879 dirty_rate_high_cnt = 0;
1880 bitmap_sync_count = 0;
1881 migration_bitmap_sync_init();
1882 qemu_mutex_init(&migration_bitmap_mutex);
1884 if (migrate_use_xbzrle()) {
1885 XBZRLE_cache_lock();
1886 ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
1887 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1888 TARGET_PAGE_SIZE,
1889 TARGET_PAGE_SIZE);
1890 if (!XBZRLE.cache) {
1891 XBZRLE_cache_unlock();
1892 error_report("Error creating cache");
1893 return -1;
1895 XBZRLE_cache_unlock();
1897 /* We prefer not to abort if there is no memory */
1898 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1899 if (!XBZRLE.encoded_buf) {
1900 error_report("Error allocating encoded_buf");
1901 return -1;
1904 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1905 if (!XBZRLE.current_buf) {
1906 error_report("Error allocating current_buf");
1907 g_free(XBZRLE.encoded_buf);
1908 XBZRLE.encoded_buf = NULL;
1909 return -1;
1912 acct_clear();
1915 /* For memory_global_dirty_log_start below. */
1916 qemu_mutex_lock_iothread();
1918 qemu_mutex_lock_ramlist();
1919 rcu_read_lock();
1920 bytes_transferred = 0;
1921 reset_ram_globals();
1923 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1924 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
1925 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
1926 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
1928 if (migrate_postcopy_ram()) {
1929 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
1930 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
1934 * Count the total number of pages used by ram blocks not including any
1935 * gaps due to alignment or unplugs.
1937 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1939 memory_global_dirty_log_start();
1940 migration_bitmap_sync();
1941 qemu_mutex_unlock_ramlist();
1942 qemu_mutex_unlock_iothread();
1943 rcu_read_unlock();
1945 return 0;
1948 /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1949 * long-running RCU critical section. When rcu-reclaims in the code
1950 * start to become numerous it will be necessary to reduce the
1951 * granularity of these critical sections.
1954 static int ram_save_setup(QEMUFile *f, void *opaque)
1956 RAMBlock *block;
1958 /* migration has already setup the bitmap, reuse it. */
1959 if (!migration_in_colo_state()) {
1960 if (ram_save_init_globals() < 0) {
1961 return -1;
1965 rcu_read_lock();
1967 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1969 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1970 qemu_put_byte(f, strlen(block->idstr));
1971 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1972 qemu_put_be64(f, block->used_length);
1975 rcu_read_unlock();
1977 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
1978 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
1980 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1982 return 0;
1985 static int ram_save_iterate(QEMUFile *f, void *opaque)
1987 int ret;
1988 int i;
1989 int64_t t0;
1990 int done = 0;
1992 rcu_read_lock();
1993 if (ram_list.version != last_version) {
1994 reset_ram_globals();
1997 /* Read version before ram_list.blocks */
1998 smp_rmb();
2000 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2002 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2003 i = 0;
2004 while ((ret = qemu_file_rate_limit(f)) == 0) {
2005 int pages;
2007 pages = ram_find_and_save_block(f, false, &bytes_transferred);
2008 /* no more pages to sent */
2009 if (pages == 0) {
2010 done = 1;
2011 break;
2013 acct_info.iterations++;
2015 /* we want to check in the 1st loop, just in case it was the 1st time
2016 and we had to sync the dirty bitmap.
2017 qemu_get_clock_ns() is a bit expensive, so we only check each some
2018 iterations
2020 if ((i & 63) == 0) {
2021 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2022 if (t1 > MAX_WAIT) {
2023 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
2024 t1, i);
2025 break;
2028 i++;
2030 flush_compressed_data(f);
2031 rcu_read_unlock();
2034 * Must occur before EOS (or any QEMUFile operation)
2035 * because of RDMA protocol.
2037 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2039 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2040 bytes_transferred += 8;
2042 ret = qemu_file_get_error(f);
2043 if (ret < 0) {
2044 return ret;
2047 return done;
2050 /* Called with iothread lock */
2051 static int ram_save_complete(QEMUFile *f, void *opaque)
2053 rcu_read_lock();
2055 if (!migration_in_postcopy(migrate_get_current())) {
2056 migration_bitmap_sync();
2059 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2061 /* try transferring iterative blocks of memory */
2063 /* flush all remaining blocks regardless of rate limiting */
2064 while (true) {
2065 int pages;
2067 pages = ram_find_and_save_block(f, !migration_in_colo_state(),
2068 &bytes_transferred);
2069 /* no more blocks to sent */
2070 if (pages == 0) {
2071 break;
2075 flush_compressed_data(f);
2076 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2078 rcu_read_unlock();
2080 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2082 return 0;
2085 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2086 uint64_t *non_postcopiable_pending,
2087 uint64_t *postcopiable_pending)
2089 uint64_t remaining_size;
2091 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2093 if (!migration_in_postcopy(migrate_get_current()) &&
2094 remaining_size < max_size) {
2095 qemu_mutex_lock_iothread();
2096 rcu_read_lock();
2097 migration_bitmap_sync();
2098 rcu_read_unlock();
2099 qemu_mutex_unlock_iothread();
2100 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2103 /* We can do postcopy, and all the data is postcopiable */
2104 *postcopiable_pending += remaining_size;
2107 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2109 unsigned int xh_len;
2110 int xh_flags;
2111 uint8_t *loaded_data;
2113 if (!xbzrle_decoded_buf) {
2114 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2116 loaded_data = xbzrle_decoded_buf;
2118 /* extract RLE header */
2119 xh_flags = qemu_get_byte(f);
2120 xh_len = qemu_get_be16(f);
2122 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2123 error_report("Failed to load XBZRLE page - wrong compression!");
2124 return -1;
2127 if (xh_len > TARGET_PAGE_SIZE) {
2128 error_report("Failed to load XBZRLE page - len overflow!");
2129 return -1;
2131 /* load data and decode */
2132 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2134 /* decode RLE */
2135 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2136 TARGET_PAGE_SIZE) == -1) {
2137 error_report("Failed to load XBZRLE page - decode error!");
2138 return -1;
2141 return 0;
2144 /* Must be called from within a rcu critical section.
2145 * Returns a pointer from within the RCU-protected ram_list.
2148 * Read a RAMBlock ID from the stream f.
2150 * f: Stream to read from
2151 * flags: Page flags (mostly to see if it's a continuation of previous block)
2153 static inline RAMBlock *ram_block_from_stream(QEMUFile *f,
2154 int flags)
2156 static RAMBlock *block = NULL;
2157 char id[256];
2158 uint8_t len;
2160 if (flags & RAM_SAVE_FLAG_CONTINUE) {
2161 if (!block) {
2162 error_report("Ack, bad migration stream!");
2163 return NULL;
2165 return block;
2168 len = qemu_get_byte(f);
2169 qemu_get_buffer(f, (uint8_t *)id, len);
2170 id[len] = 0;
2172 block = qemu_ram_block_by_name(id);
2173 if (!block) {
2174 error_report("Can't find block %s", id);
2175 return NULL;
2178 return block;
2181 static inline void *host_from_ram_block_offset(RAMBlock *block,
2182 ram_addr_t offset)
2184 if (!offset_in_ramblock(block, offset)) {
2185 return NULL;
2188 return block->host + offset;
2192 * If a page (or a whole RDMA chunk) has been
2193 * determined to be zero, then zap it.
2195 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2197 if (ch != 0 || !is_zero_range(host, size)) {
2198 memset(host, ch, size);
2202 static void *do_data_decompress(void *opaque)
2204 DecompressParam *param = opaque;
2205 unsigned long pagesize;
2206 uint8_t *des;
2207 int len;
2209 qemu_mutex_lock(&param->mutex);
2210 while (!param->quit) {
2211 if (param->des) {
2212 des = param->des;
2213 len = param->len;
2214 param->des = 0;
2215 qemu_mutex_unlock(&param->mutex);
2217 pagesize = TARGET_PAGE_SIZE;
2218 /* uncompress() will return failed in some case, especially
2219 * when the page is dirted when doing the compression, it's
2220 * not a problem because the dirty page will be retransferred
2221 * and uncompress() won't break the data in other pages.
2223 uncompress((Bytef *)des, &pagesize,
2224 (const Bytef *)param->compbuf, len);
2226 qemu_mutex_lock(&decomp_done_lock);
2227 param->done = true;
2228 qemu_cond_signal(&decomp_done_cond);
2229 qemu_mutex_unlock(&decomp_done_lock);
2231 qemu_mutex_lock(&param->mutex);
2232 } else {
2233 qemu_cond_wait(&param->cond, &param->mutex);
2236 qemu_mutex_unlock(&param->mutex);
2238 return NULL;
2241 static void wait_for_decompress_done(void)
2243 int idx, thread_count;
2245 if (!migrate_use_compression()) {
2246 return;
2249 thread_count = migrate_decompress_threads();
2250 qemu_mutex_lock(&decomp_done_lock);
2251 for (idx = 0; idx < thread_count; idx++) {
2252 while (!decomp_param[idx].done) {
2253 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2256 qemu_mutex_unlock(&decomp_done_lock);
2259 void migrate_decompress_threads_create(void)
2261 int i, thread_count;
2263 thread_count = migrate_decompress_threads();
2264 decompress_threads = g_new0(QemuThread, thread_count);
2265 decomp_param = g_new0(DecompressParam, thread_count);
2266 qemu_mutex_init(&decomp_done_lock);
2267 qemu_cond_init(&decomp_done_cond);
2268 for (i = 0; i < thread_count; i++) {
2269 qemu_mutex_init(&decomp_param[i].mutex);
2270 qemu_cond_init(&decomp_param[i].cond);
2271 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2272 decomp_param[i].done = true;
2273 decomp_param[i].quit = false;
2274 qemu_thread_create(decompress_threads + i, "decompress",
2275 do_data_decompress, decomp_param + i,
2276 QEMU_THREAD_JOINABLE);
2280 void migrate_decompress_threads_join(void)
2282 int i, thread_count;
2284 thread_count = migrate_decompress_threads();
2285 for (i = 0; i < thread_count; i++) {
2286 qemu_mutex_lock(&decomp_param[i].mutex);
2287 decomp_param[i].quit = true;
2288 qemu_cond_signal(&decomp_param[i].cond);
2289 qemu_mutex_unlock(&decomp_param[i].mutex);
2291 for (i = 0; i < thread_count; i++) {
2292 qemu_thread_join(decompress_threads + i);
2293 qemu_mutex_destroy(&decomp_param[i].mutex);
2294 qemu_cond_destroy(&decomp_param[i].cond);
2295 g_free(decomp_param[i].compbuf);
2297 g_free(decompress_threads);
2298 g_free(decomp_param);
2299 decompress_threads = NULL;
2300 decomp_param = NULL;
2303 static void decompress_data_with_multi_threads(QEMUFile *f,
2304 void *host, int len)
2306 int idx, thread_count;
2308 thread_count = migrate_decompress_threads();
2309 qemu_mutex_lock(&decomp_done_lock);
2310 while (true) {
2311 for (idx = 0; idx < thread_count; idx++) {
2312 if (decomp_param[idx].done) {
2313 decomp_param[idx].done = false;
2314 qemu_mutex_lock(&decomp_param[idx].mutex);
2315 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2316 decomp_param[idx].des = host;
2317 decomp_param[idx].len = len;
2318 qemu_cond_signal(&decomp_param[idx].cond);
2319 qemu_mutex_unlock(&decomp_param[idx].mutex);
2320 break;
2323 if (idx < thread_count) {
2324 break;
2325 } else {
2326 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2329 qemu_mutex_unlock(&decomp_done_lock);
2333 * Allocate data structures etc needed by incoming migration with postcopy-ram
2334 * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
2336 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2338 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2340 return postcopy_ram_incoming_init(mis, ram_pages);
2344 * Called in postcopy mode by ram_load().
2345 * rcu_read_lock is taken prior to this being called.
2347 static int ram_load_postcopy(QEMUFile *f)
2349 int flags = 0, ret = 0;
2350 bool place_needed = false;
2351 bool matching_page_sizes = qemu_host_page_size == TARGET_PAGE_SIZE;
2352 MigrationIncomingState *mis = migration_incoming_get_current();
2353 /* Temporary page that is later 'placed' */
2354 void *postcopy_host_page = postcopy_get_tmp_page(mis);
2355 void *last_host = NULL;
2356 bool all_zero = false;
2358 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2359 ram_addr_t addr;
2360 void *host = NULL;
2361 void *page_buffer = NULL;
2362 void *place_source = NULL;
2363 uint8_t ch;
2365 addr = qemu_get_be64(f);
2366 flags = addr & ~TARGET_PAGE_MASK;
2367 addr &= TARGET_PAGE_MASK;
2369 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2370 place_needed = false;
2371 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
2372 RAMBlock *block = ram_block_from_stream(f, flags);
2374 host = host_from_ram_block_offset(block, addr);
2375 if (!host) {
2376 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2377 ret = -EINVAL;
2378 break;
2381 * Postcopy requires that we place whole host pages atomically.
2382 * To make it atomic, the data is read into a temporary page
2383 * that's moved into place later.
2384 * The migration protocol uses, possibly smaller, target-pages
2385 * however the source ensures it always sends all the components
2386 * of a host page in order.
2388 page_buffer = postcopy_host_page +
2389 ((uintptr_t)host & ~qemu_host_page_mask);
2390 /* If all TP are zero then we can optimise the place */
2391 if (!((uintptr_t)host & ~qemu_host_page_mask)) {
2392 all_zero = true;
2393 } else {
2394 /* not the 1st TP within the HP */
2395 if (host != (last_host + TARGET_PAGE_SIZE)) {
2396 error_report("Non-sequential target page %p/%p",
2397 host, last_host);
2398 ret = -EINVAL;
2399 break;
2405 * If it's the last part of a host page then we place the host
2406 * page
2408 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2409 ~qemu_host_page_mask) == 0;
2410 place_source = postcopy_host_page;
2412 last_host = host;
2414 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2415 case RAM_SAVE_FLAG_COMPRESS:
2416 ch = qemu_get_byte(f);
2417 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2418 if (ch) {
2419 all_zero = false;
2421 break;
2423 case RAM_SAVE_FLAG_PAGE:
2424 all_zero = false;
2425 if (!place_needed || !matching_page_sizes) {
2426 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2427 } else {
2428 /* Avoids the qemu_file copy during postcopy, which is
2429 * going to do a copy later; can only do it when we
2430 * do this read in one go (matching page sizes)
2432 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2433 TARGET_PAGE_SIZE);
2435 break;
2436 case RAM_SAVE_FLAG_EOS:
2437 /* normal exit */
2438 break;
2439 default:
2440 error_report("Unknown combination of migration flags: %#x"
2441 " (postcopy mode)", flags);
2442 ret = -EINVAL;
2445 if (place_needed) {
2446 /* This gets called at the last target page in the host page */
2447 if (all_zero) {
2448 ret = postcopy_place_page_zero(mis,
2449 host + TARGET_PAGE_SIZE -
2450 qemu_host_page_size);
2451 } else {
2452 ret = postcopy_place_page(mis, host + TARGET_PAGE_SIZE -
2453 qemu_host_page_size,
2454 place_source);
2457 if (!ret) {
2458 ret = qemu_file_get_error(f);
2462 return ret;
2465 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2467 int flags = 0, ret = 0;
2468 static uint64_t seq_iter;
2469 int len = 0;
2471 * If system is running in postcopy mode, page inserts to host memory must
2472 * be atomic
2474 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2476 seq_iter++;
2478 if (version_id != 4) {
2479 ret = -EINVAL;
2482 /* This RCU critical section can be very long running.
2483 * When RCU reclaims in the code start to become numerous,
2484 * it will be necessary to reduce the granularity of this
2485 * critical section.
2487 rcu_read_lock();
2489 if (postcopy_running) {
2490 ret = ram_load_postcopy(f);
2493 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2494 ram_addr_t addr, total_ram_bytes;
2495 void *host = NULL;
2496 uint8_t ch;
2498 addr = qemu_get_be64(f);
2499 flags = addr & ~TARGET_PAGE_MASK;
2500 addr &= TARGET_PAGE_MASK;
2502 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2503 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2504 RAMBlock *block = ram_block_from_stream(f, flags);
2506 host = host_from_ram_block_offset(block, addr);
2507 if (!host) {
2508 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2509 ret = -EINVAL;
2510 break;
2514 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2515 case RAM_SAVE_FLAG_MEM_SIZE:
2516 /* Synchronize RAM block list */
2517 total_ram_bytes = addr;
2518 while (!ret && total_ram_bytes) {
2519 RAMBlock *block;
2520 char id[256];
2521 ram_addr_t length;
2523 len = qemu_get_byte(f);
2524 qemu_get_buffer(f, (uint8_t *)id, len);
2525 id[len] = 0;
2526 length = qemu_get_be64(f);
2528 block = qemu_ram_block_by_name(id);
2529 if (block) {
2530 if (length != block->used_length) {
2531 Error *local_err = NULL;
2533 ret = qemu_ram_resize(block, length,
2534 &local_err);
2535 if (local_err) {
2536 error_report_err(local_err);
2539 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2540 block->idstr);
2541 } else {
2542 error_report("Unknown ramblock \"%s\", cannot "
2543 "accept migration", id);
2544 ret = -EINVAL;
2547 total_ram_bytes -= length;
2549 break;
2551 case RAM_SAVE_FLAG_COMPRESS:
2552 ch = qemu_get_byte(f);
2553 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2554 break;
2556 case RAM_SAVE_FLAG_PAGE:
2557 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2558 break;
2560 case RAM_SAVE_FLAG_COMPRESS_PAGE:
2561 len = qemu_get_be32(f);
2562 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2563 error_report("Invalid compressed data length: %d", len);
2564 ret = -EINVAL;
2565 break;
2567 decompress_data_with_multi_threads(f, host, len);
2568 break;
2570 case RAM_SAVE_FLAG_XBZRLE:
2571 if (load_xbzrle(f, addr, host) < 0) {
2572 error_report("Failed to decompress XBZRLE page at "
2573 RAM_ADDR_FMT, addr);
2574 ret = -EINVAL;
2575 break;
2577 break;
2578 case RAM_SAVE_FLAG_EOS:
2579 /* normal exit */
2580 break;
2581 default:
2582 if (flags & RAM_SAVE_FLAG_HOOK) {
2583 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2584 } else {
2585 error_report("Unknown combination of migration flags: %#x",
2586 flags);
2587 ret = -EINVAL;
2590 if (!ret) {
2591 ret = qemu_file_get_error(f);
2595 wait_for_decompress_done();
2596 rcu_read_unlock();
2597 DPRINTF("Completed load of VM with exit code %d seq iteration "
2598 "%" PRIu64 "\n", ret, seq_iter);
2599 return ret;
2602 static SaveVMHandlers savevm_ram_handlers = {
2603 .save_live_setup = ram_save_setup,
2604 .save_live_iterate = ram_save_iterate,
2605 .save_live_complete_postcopy = ram_save_complete,
2606 .save_live_complete_precopy = ram_save_complete,
2607 .save_live_pending = ram_save_pending,
2608 .load_state = ram_load,
2609 .cleanup = ram_migration_cleanup,
2612 void ram_mig_init(void)
2614 qemu_mutex_init(&XBZRLE.lock);
2615 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);