qemu-doc: include version number
[qemu.git] / migration / ram.c
blobc1b4f4abf36fc1dc5ad49a1c4c1aa5c5e9cc2b25
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
28 #include "qemu/osdep.h"
29 #include "qemu-common.h"
30 #include "cpu.h"
31 #include <zlib.h>
32 #include "qapi-event.h"
33 #include "qemu/cutils.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "qemu/timer.h"
37 #include "qemu/main-loop.h"
38 #include "xbzrle.h"
39 #include "ram.h"
40 #include "migration.h"
41 #include "migration/register.h"
42 #include "migration/misc.h"
43 #include "qemu-file.h"
44 #include "migration/vmstate.h"
45 #include "postcopy-ram.h"
46 #include "exec/address-spaces.h"
47 #include "migration/page_cache.h"
48 #include "qemu/error-report.h"
49 #include "trace.h"
50 #include "exec/ram_addr.h"
51 #include "qemu/rcu_queue.h"
52 #include "migration/colo.h"
54 /***********************************************************/
55 /* ram save/restore */
57 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
58 * worked for pages that where filled with the same char. We switched
59 * it to only search for the zero value. And to avoid confusion with
60 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
63 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
64 #define RAM_SAVE_FLAG_ZERO 0x02
65 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
66 #define RAM_SAVE_FLAG_PAGE 0x08
67 #define RAM_SAVE_FLAG_EOS 0x10
68 #define RAM_SAVE_FLAG_CONTINUE 0x20
69 #define RAM_SAVE_FLAG_XBZRLE 0x40
70 /* 0x80 is reserved in migration.h start with 0x100 next */
71 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
73 static inline bool is_zero_range(uint8_t *p, uint64_t size)
75 return buffer_is_zero(p, size);
78 XBZRLECacheStats xbzrle_counters;
80 /* struct contains XBZRLE cache and a static page
81 used by the compression */
82 static struct {
83 /* buffer used for XBZRLE encoding */
84 uint8_t *encoded_buf;
85 /* buffer for storing page content */
86 uint8_t *current_buf;
87 /* Cache for XBZRLE, Protected by lock. */
88 PageCache *cache;
89 QemuMutex lock;
90 /* it will store a page full of zeros */
91 uint8_t *zero_target_page;
92 } XBZRLE;
94 /* buffer used for XBZRLE decoding */
95 static uint8_t *xbzrle_decoded_buf;
97 static void XBZRLE_cache_lock(void)
99 if (migrate_use_xbzrle())
100 qemu_mutex_lock(&XBZRLE.lock);
103 static void XBZRLE_cache_unlock(void)
105 if (migrate_use_xbzrle())
106 qemu_mutex_unlock(&XBZRLE.lock);
110 * xbzrle_cache_resize: resize the xbzrle cache
112 * This function is called from qmp_migrate_set_cache_size in main
113 * thread, possibly while a migration is in progress. A running
114 * migration may be using the cache and might finish during this call,
115 * hence changes to the cache are protected by XBZRLE.lock().
117 * Returns the new_size or negative in case of error.
119 * @new_size: new cache size
121 int64_t xbzrle_cache_resize(int64_t new_size)
123 PageCache *new_cache;
124 int64_t ret;
126 if (new_size < TARGET_PAGE_SIZE) {
127 return -1;
130 XBZRLE_cache_lock();
132 if (XBZRLE.cache != NULL) {
133 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
134 goto out_new_size;
136 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
137 TARGET_PAGE_SIZE);
138 if (!new_cache) {
139 error_report("Error creating cache");
140 ret = -1;
141 goto out;
144 cache_fini(XBZRLE.cache);
145 XBZRLE.cache = new_cache;
148 out_new_size:
149 ret = pow2floor(new_size);
150 out:
151 XBZRLE_cache_unlock();
152 return ret;
156 * An outstanding page request, on the source, having been received
157 * and queued
159 struct RAMSrcPageRequest {
160 RAMBlock *rb;
161 hwaddr offset;
162 hwaddr len;
164 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
167 /* State of RAM for migration */
168 struct RAMState {
169 /* QEMUFile used for this migration */
170 QEMUFile *f;
171 /* Last block that we have visited searching for dirty pages */
172 RAMBlock *last_seen_block;
173 /* Last block from where we have sent data */
174 RAMBlock *last_sent_block;
175 /* Last dirty target page we have sent */
176 ram_addr_t last_page;
177 /* last ram version we have seen */
178 uint32_t last_version;
179 /* We are in the first round */
180 bool ram_bulk_stage;
181 /* How many times we have dirty too many pages */
182 int dirty_rate_high_cnt;
183 /* these variables are used for bitmap sync */
184 /* last time we did a full bitmap_sync */
185 int64_t time_last_bitmap_sync;
186 /* bytes transferred at start_time */
187 uint64_t bytes_xfer_prev;
188 /* number of dirty pages since start_time */
189 uint64_t num_dirty_pages_period;
190 /* xbzrle misses since the beginning of the period */
191 uint64_t xbzrle_cache_miss_prev;
192 /* number of iterations at the beginning of period */
193 uint64_t iterations_prev;
194 /* Iterations since start */
195 uint64_t iterations;
196 /* protects modification of the bitmap */
197 uint64_t migration_dirty_pages;
198 /* number of dirty bits in the bitmap */
199 QemuMutex bitmap_mutex;
200 /* The RAMBlock used in the last src_page_requests */
201 RAMBlock *last_req_rb;
202 /* Queue of outstanding page requests from the destination */
203 QemuMutex src_page_req_mutex;
204 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
206 typedef struct RAMState RAMState;
208 static RAMState *ram_state;
210 uint64_t ram_bytes_remaining(void)
212 return ram_state->migration_dirty_pages * TARGET_PAGE_SIZE;
215 MigrationStats ram_counters;
217 /* used by the search for pages to send */
218 struct PageSearchStatus {
219 /* Current block being searched */
220 RAMBlock *block;
221 /* Current page to search from */
222 unsigned long page;
223 /* Set once we wrap around */
224 bool complete_round;
226 typedef struct PageSearchStatus PageSearchStatus;
228 struct CompressParam {
229 bool done;
230 bool quit;
231 QEMUFile *file;
232 QemuMutex mutex;
233 QemuCond cond;
234 RAMBlock *block;
235 ram_addr_t offset;
237 typedef struct CompressParam CompressParam;
239 struct DecompressParam {
240 bool done;
241 bool quit;
242 QemuMutex mutex;
243 QemuCond cond;
244 void *des;
245 uint8_t *compbuf;
246 int len;
248 typedef struct DecompressParam DecompressParam;
250 static CompressParam *comp_param;
251 static QemuThread *compress_threads;
252 /* comp_done_cond is used to wake up the migration thread when
253 * one of the compression threads has finished the compression.
254 * comp_done_lock is used to co-work with comp_done_cond.
256 static QemuMutex comp_done_lock;
257 static QemuCond comp_done_cond;
258 /* The empty QEMUFileOps will be used by file in CompressParam */
259 static const QEMUFileOps empty_ops = { };
261 static DecompressParam *decomp_param;
262 static QemuThread *decompress_threads;
263 static QemuMutex decomp_done_lock;
264 static QemuCond decomp_done_cond;
266 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
267 ram_addr_t offset);
269 static void *do_data_compress(void *opaque)
271 CompressParam *param = opaque;
272 RAMBlock *block;
273 ram_addr_t offset;
275 qemu_mutex_lock(&param->mutex);
276 while (!param->quit) {
277 if (param->block) {
278 block = param->block;
279 offset = param->offset;
280 param->block = NULL;
281 qemu_mutex_unlock(&param->mutex);
283 do_compress_ram_page(param->file, block, offset);
285 qemu_mutex_lock(&comp_done_lock);
286 param->done = true;
287 qemu_cond_signal(&comp_done_cond);
288 qemu_mutex_unlock(&comp_done_lock);
290 qemu_mutex_lock(&param->mutex);
291 } else {
292 qemu_cond_wait(&param->cond, &param->mutex);
295 qemu_mutex_unlock(&param->mutex);
297 return NULL;
300 static inline void terminate_compression_threads(void)
302 int idx, thread_count;
304 thread_count = migrate_compress_threads();
306 for (idx = 0; idx < thread_count; idx++) {
307 qemu_mutex_lock(&comp_param[idx].mutex);
308 comp_param[idx].quit = true;
309 qemu_cond_signal(&comp_param[idx].cond);
310 qemu_mutex_unlock(&comp_param[idx].mutex);
314 void migrate_compress_threads_join(void)
316 int i, thread_count;
318 if (!migrate_use_compression()) {
319 return;
321 terminate_compression_threads();
322 thread_count = migrate_compress_threads();
323 for (i = 0; i < thread_count; i++) {
324 qemu_thread_join(compress_threads + i);
325 qemu_fclose(comp_param[i].file);
326 qemu_mutex_destroy(&comp_param[i].mutex);
327 qemu_cond_destroy(&comp_param[i].cond);
329 qemu_mutex_destroy(&comp_done_lock);
330 qemu_cond_destroy(&comp_done_cond);
331 g_free(compress_threads);
332 g_free(comp_param);
333 compress_threads = NULL;
334 comp_param = NULL;
337 void migrate_compress_threads_create(void)
339 int i, thread_count;
341 if (!migrate_use_compression()) {
342 return;
344 thread_count = migrate_compress_threads();
345 compress_threads = g_new0(QemuThread, thread_count);
346 comp_param = g_new0(CompressParam, thread_count);
347 qemu_cond_init(&comp_done_cond);
348 qemu_mutex_init(&comp_done_lock);
349 for (i = 0; i < thread_count; i++) {
350 /* comp_param[i].file is just used as a dummy buffer to save data,
351 * set its ops to empty.
353 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
354 comp_param[i].done = true;
355 comp_param[i].quit = false;
356 qemu_mutex_init(&comp_param[i].mutex);
357 qemu_cond_init(&comp_param[i].cond);
358 qemu_thread_create(compress_threads + i, "compress",
359 do_data_compress, comp_param + i,
360 QEMU_THREAD_JOINABLE);
365 * save_page_header: write page header to wire
367 * If this is the 1st block, it also writes the block identification
369 * Returns the number of bytes written
371 * @f: QEMUFile where to send the data
372 * @block: block that contains the page we want to send
373 * @offset: offset inside the block for the page
374 * in the lower bits, it contains flags
376 static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
377 ram_addr_t offset)
379 size_t size, len;
381 if (block == rs->last_sent_block) {
382 offset |= RAM_SAVE_FLAG_CONTINUE;
384 qemu_put_be64(f, offset);
385 size = 8;
387 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
388 len = strlen(block->idstr);
389 qemu_put_byte(f, len);
390 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
391 size += 1 + len;
392 rs->last_sent_block = block;
394 return size;
398 * mig_throttle_guest_down: throotle down the guest
400 * Reduce amount of guest cpu execution to hopefully slow down memory
401 * writes. If guest dirty memory rate is reduced below the rate at
402 * which we can transfer pages to the destination then we should be
403 * able to complete migration. Some workloads dirty memory way too
404 * fast and will not effectively converge, even with auto-converge.
406 static void mig_throttle_guest_down(void)
408 MigrationState *s = migrate_get_current();
409 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
410 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
412 /* We have not started throttling yet. Let's start it. */
413 if (!cpu_throttle_active()) {
414 cpu_throttle_set(pct_initial);
415 } else {
416 /* Throttling already on, just increase the rate */
417 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
422 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
424 * @rs: current RAM state
425 * @current_addr: address for the zero page
427 * Update the xbzrle cache to reflect a page that's been sent as all 0.
428 * The important thing is that a stale (not-yet-0'd) page be replaced
429 * by the new data.
430 * As a bonus, if the page wasn't in the cache it gets added so that
431 * when a small write is made into the 0'd page it gets XBZRLE sent.
433 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
435 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
436 return;
439 /* We don't care if this fails to allocate a new cache page
440 * as long as it updated an old one */
441 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
442 ram_counters.dirty_sync_count);
445 #define ENCODING_FLAG_XBZRLE 0x1
448 * save_xbzrle_page: compress and send current page
450 * Returns: 1 means that we wrote the page
451 * 0 means that page is identical to the one already sent
452 * -1 means that xbzrle would be longer than normal
454 * @rs: current RAM state
455 * @current_data: pointer to the address of the page contents
456 * @current_addr: addr of the page
457 * @block: block that contains the page we want to send
458 * @offset: offset inside the block for the page
459 * @last_stage: if we are at the completion stage
461 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
462 ram_addr_t current_addr, RAMBlock *block,
463 ram_addr_t offset, bool last_stage)
465 int encoded_len = 0, bytes_xbzrle;
466 uint8_t *prev_cached_page;
468 if (!cache_is_cached(XBZRLE.cache, current_addr,
469 ram_counters.dirty_sync_count)) {
470 xbzrle_counters.cache_miss++;
471 if (!last_stage) {
472 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
473 ram_counters.dirty_sync_count) == -1) {
474 return -1;
475 } else {
476 /* update *current_data when the page has been
477 inserted into cache */
478 *current_data = get_cached_data(XBZRLE.cache, current_addr);
481 return -1;
484 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
486 /* save current buffer into memory */
487 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
489 /* XBZRLE encoding (if there is no overflow) */
490 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
491 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
492 TARGET_PAGE_SIZE);
493 if (encoded_len == 0) {
494 trace_save_xbzrle_page_skipping();
495 return 0;
496 } else if (encoded_len == -1) {
497 trace_save_xbzrle_page_overflow();
498 xbzrle_counters.overflow++;
499 /* update data in the cache */
500 if (!last_stage) {
501 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
502 *current_data = prev_cached_page;
504 return -1;
507 /* we need to update the data in the cache, in order to get the same data */
508 if (!last_stage) {
509 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
512 /* Send XBZRLE based compressed page */
513 bytes_xbzrle = save_page_header(rs, rs->f, block,
514 offset | RAM_SAVE_FLAG_XBZRLE);
515 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
516 qemu_put_be16(rs->f, encoded_len);
517 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
518 bytes_xbzrle += encoded_len + 1 + 2;
519 xbzrle_counters.pages++;
520 xbzrle_counters.bytes += bytes_xbzrle;
521 ram_counters.transferred += bytes_xbzrle;
523 return 1;
527 * migration_bitmap_find_dirty: find the next dirty page from start
529 * Called with rcu_read_lock() to protect migration_bitmap
531 * Returns the byte offset within memory region of the start of a dirty page
533 * @rs: current RAM state
534 * @rb: RAMBlock where to search for dirty pages
535 * @start: page where we start the search
537 static inline
538 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
539 unsigned long start)
541 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
542 unsigned long *bitmap = rb->bmap;
543 unsigned long next;
545 if (rs->ram_bulk_stage && start > 0) {
546 next = start + 1;
547 } else {
548 next = find_next_bit(bitmap, size, start);
551 return next;
554 static inline bool migration_bitmap_clear_dirty(RAMState *rs,
555 RAMBlock *rb,
556 unsigned long page)
558 bool ret;
560 ret = test_and_clear_bit(page, rb->bmap);
562 if (ret) {
563 rs->migration_dirty_pages--;
565 return ret;
568 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
569 ram_addr_t start, ram_addr_t length)
571 rs->migration_dirty_pages +=
572 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
573 &rs->num_dirty_pages_period);
577 * ram_pagesize_summary: calculate all the pagesizes of a VM
579 * Returns a summary bitmap of the page sizes of all RAMBlocks
581 * For VMs with just normal pages this is equivalent to the host page
582 * size. If it's got some huge pages then it's the OR of all the
583 * different page sizes.
585 uint64_t ram_pagesize_summary(void)
587 RAMBlock *block;
588 uint64_t summary = 0;
590 RAMBLOCK_FOREACH(block) {
591 summary |= block->page_size;
594 return summary;
597 static void migration_bitmap_sync(RAMState *rs)
599 RAMBlock *block;
600 int64_t end_time;
601 uint64_t bytes_xfer_now;
603 ram_counters.dirty_sync_count++;
605 if (!rs->time_last_bitmap_sync) {
606 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
609 trace_migration_bitmap_sync_start();
610 memory_global_dirty_log_sync();
612 qemu_mutex_lock(&rs->bitmap_mutex);
613 rcu_read_lock();
614 RAMBLOCK_FOREACH(block) {
615 migration_bitmap_sync_range(rs, block, 0, block->used_length);
617 rcu_read_unlock();
618 qemu_mutex_unlock(&rs->bitmap_mutex);
620 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
622 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
624 /* more than 1 second = 1000 millisecons */
625 if (end_time > rs->time_last_bitmap_sync + 1000) {
626 /* calculate period counters */
627 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
628 / (end_time - rs->time_last_bitmap_sync);
629 bytes_xfer_now = ram_counters.transferred;
631 if (migrate_auto_converge()) {
632 /* The following detection logic can be refined later. For now:
633 Check to see if the dirtied bytes is 50% more than the approx.
634 amount of bytes that just got transferred since the last time we
635 were in this routine. If that happens twice, start or increase
636 throttling */
638 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
639 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
640 (++rs->dirty_rate_high_cnt >= 2)) {
641 trace_migration_throttle();
642 rs->dirty_rate_high_cnt = 0;
643 mig_throttle_guest_down();
647 if (migrate_use_xbzrle()) {
648 if (rs->iterations_prev != rs->iterations) {
649 xbzrle_counters.cache_miss_rate =
650 (double)(xbzrle_counters.cache_miss -
651 rs->xbzrle_cache_miss_prev) /
652 (rs->iterations - rs->iterations_prev);
654 rs->iterations_prev = rs->iterations;
655 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
658 /* reset period counters */
659 rs->time_last_bitmap_sync = end_time;
660 rs->num_dirty_pages_period = 0;
661 rs->bytes_xfer_prev = bytes_xfer_now;
663 if (migrate_use_events()) {
664 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
669 * save_zero_page: send the zero page to the stream
671 * Returns the number of pages written.
673 * @rs: current RAM state
674 * @block: block that contains the page we want to send
675 * @offset: offset inside the block for the page
676 * @p: pointer to the page
678 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
679 uint8_t *p)
681 int pages = -1;
683 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
684 ram_counters.duplicate++;
685 ram_counters.transferred +=
686 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
687 qemu_put_byte(rs->f, 0);
688 ram_counters.transferred += 1;
689 pages = 1;
692 return pages;
695 static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
697 if (!migrate_release_ram() || !migration_in_postcopy()) {
698 return;
701 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
705 * ram_save_page: send the given page to the stream
707 * Returns the number of pages written.
708 * < 0 - error
709 * >=0 - Number of pages written - this might legally be 0
710 * if xbzrle noticed the page was the same.
712 * @rs: current RAM state
713 * @block: block that contains the page we want to send
714 * @offset: offset inside the block for the page
715 * @last_stage: if we are at the completion stage
717 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
719 int pages = -1;
720 uint64_t bytes_xmit;
721 ram_addr_t current_addr;
722 uint8_t *p;
723 int ret;
724 bool send_async = true;
725 RAMBlock *block = pss->block;
726 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
728 p = block->host + offset;
729 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
731 /* In doubt sent page as normal */
732 bytes_xmit = 0;
733 ret = ram_control_save_page(rs->f, block->offset,
734 offset, TARGET_PAGE_SIZE, &bytes_xmit);
735 if (bytes_xmit) {
736 ram_counters.transferred += bytes_xmit;
737 pages = 1;
740 XBZRLE_cache_lock();
742 current_addr = block->offset + offset;
744 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
745 if (ret != RAM_SAVE_CONTROL_DELAYED) {
746 if (bytes_xmit > 0) {
747 ram_counters.normal++;
748 } else if (bytes_xmit == 0) {
749 ram_counters.duplicate++;
752 } else {
753 pages = save_zero_page(rs, block, offset, p);
754 if (pages > 0) {
755 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
756 * page would be stale
758 xbzrle_cache_zero_page(rs, current_addr);
759 ram_release_pages(block->idstr, offset, pages);
760 } else if (!rs->ram_bulk_stage &&
761 !migration_in_postcopy() && migrate_use_xbzrle()) {
762 pages = save_xbzrle_page(rs, &p, current_addr, block,
763 offset, last_stage);
764 if (!last_stage) {
765 /* Can't send this cached data async, since the cache page
766 * might get updated before it gets to the wire
768 send_async = false;
773 /* XBZRLE overflow or normal page */
774 if (pages == -1) {
775 ram_counters.transferred +=
776 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_PAGE);
777 if (send_async) {
778 qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
779 migrate_release_ram() &
780 migration_in_postcopy());
781 } else {
782 qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
784 ram_counters.transferred += TARGET_PAGE_SIZE;
785 pages = 1;
786 ram_counters.normal++;
789 XBZRLE_cache_unlock();
791 return pages;
794 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
795 ram_addr_t offset)
797 RAMState *rs = ram_state;
798 int bytes_sent, blen;
799 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
801 bytes_sent = save_page_header(rs, f, block, offset |
802 RAM_SAVE_FLAG_COMPRESS_PAGE);
803 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
804 migrate_compress_level());
805 if (blen < 0) {
806 bytes_sent = 0;
807 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
808 error_report("compressed data failed!");
809 } else {
810 bytes_sent += blen;
811 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
814 return bytes_sent;
817 static void flush_compressed_data(RAMState *rs)
819 int idx, len, thread_count;
821 if (!migrate_use_compression()) {
822 return;
824 thread_count = migrate_compress_threads();
826 qemu_mutex_lock(&comp_done_lock);
827 for (idx = 0; idx < thread_count; idx++) {
828 while (!comp_param[idx].done) {
829 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
832 qemu_mutex_unlock(&comp_done_lock);
834 for (idx = 0; idx < thread_count; idx++) {
835 qemu_mutex_lock(&comp_param[idx].mutex);
836 if (!comp_param[idx].quit) {
837 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
838 ram_counters.transferred += len;
840 qemu_mutex_unlock(&comp_param[idx].mutex);
844 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
845 ram_addr_t offset)
847 param->block = block;
848 param->offset = offset;
851 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
852 ram_addr_t offset)
854 int idx, thread_count, bytes_xmit = -1, pages = -1;
856 thread_count = migrate_compress_threads();
857 qemu_mutex_lock(&comp_done_lock);
858 while (true) {
859 for (idx = 0; idx < thread_count; idx++) {
860 if (comp_param[idx].done) {
861 comp_param[idx].done = false;
862 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
863 qemu_mutex_lock(&comp_param[idx].mutex);
864 set_compress_params(&comp_param[idx], block, offset);
865 qemu_cond_signal(&comp_param[idx].cond);
866 qemu_mutex_unlock(&comp_param[idx].mutex);
867 pages = 1;
868 ram_counters.normal++;
869 ram_counters.transferred += bytes_xmit;
870 break;
873 if (pages > 0) {
874 break;
875 } else {
876 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
879 qemu_mutex_unlock(&comp_done_lock);
881 return pages;
885 * ram_save_compressed_page: compress the given page and send it to the stream
887 * Returns the number of pages written.
889 * @rs: current RAM state
890 * @block: block that contains the page we want to send
891 * @offset: offset inside the block for the page
892 * @last_stage: if we are at the completion stage
894 static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss,
895 bool last_stage)
897 int pages = -1;
898 uint64_t bytes_xmit = 0;
899 uint8_t *p;
900 int ret, blen;
901 RAMBlock *block = pss->block;
902 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
904 p = block->host + offset;
906 ret = ram_control_save_page(rs->f, block->offset,
907 offset, TARGET_PAGE_SIZE, &bytes_xmit);
908 if (bytes_xmit) {
909 ram_counters.transferred += bytes_xmit;
910 pages = 1;
912 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
913 if (ret != RAM_SAVE_CONTROL_DELAYED) {
914 if (bytes_xmit > 0) {
915 ram_counters.normal++;
916 } else if (bytes_xmit == 0) {
917 ram_counters.duplicate++;
920 } else {
921 /* When starting the process of a new block, the first page of
922 * the block should be sent out before other pages in the same
923 * block, and all the pages in last block should have been sent
924 * out, keeping this order is important, because the 'cont' flag
925 * is used to avoid resending the block name.
927 if (block != rs->last_sent_block) {
928 flush_compressed_data(rs);
929 pages = save_zero_page(rs, block, offset, p);
930 if (pages == -1) {
931 /* Make sure the first page is sent out before other pages */
932 bytes_xmit = save_page_header(rs, rs->f, block, offset |
933 RAM_SAVE_FLAG_COMPRESS_PAGE);
934 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE,
935 migrate_compress_level());
936 if (blen > 0) {
937 ram_counters.transferred += bytes_xmit + blen;
938 ram_counters.normal++;
939 pages = 1;
940 } else {
941 qemu_file_set_error(rs->f, blen);
942 error_report("compressed data failed!");
945 if (pages > 0) {
946 ram_release_pages(block->idstr, offset, pages);
948 } else {
949 pages = save_zero_page(rs, block, offset, p);
950 if (pages == -1) {
951 pages = compress_page_with_multi_thread(rs, block, offset);
952 } else {
953 ram_release_pages(block->idstr, offset, pages);
958 return pages;
962 * find_dirty_block: find the next dirty page and update any state
963 * associated with the search process.
965 * Returns if a page is found
967 * @rs: current RAM state
968 * @pss: data about the state of the current dirty page scan
969 * @again: set to false if the search has scanned the whole of RAM
971 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
973 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
974 if (pss->complete_round && pss->block == rs->last_seen_block &&
975 pss->page >= rs->last_page) {
977 * We've been once around the RAM and haven't found anything.
978 * Give up.
980 *again = false;
981 return false;
983 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
984 /* Didn't find anything in this RAM Block */
985 pss->page = 0;
986 pss->block = QLIST_NEXT_RCU(pss->block, next);
987 if (!pss->block) {
988 /* Hit the end of the list */
989 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
990 /* Flag that we've looped */
991 pss->complete_round = true;
992 rs->ram_bulk_stage = false;
993 if (migrate_use_xbzrle()) {
994 /* If xbzrle is on, stop using the data compression at this
995 * point. In theory, xbzrle can do better than compression.
997 flush_compressed_data(rs);
1000 /* Didn't find anything this time, but try again on the new block */
1001 *again = true;
1002 return false;
1003 } else {
1004 /* Can go around again, but... */
1005 *again = true;
1006 /* We've found something so probably don't need to */
1007 return true;
1012 * unqueue_page: gets a page of the queue
1014 * Helper for 'get_queued_page' - gets a page off the queue
1016 * Returns the block of the page (or NULL if none available)
1018 * @rs: current RAM state
1019 * @offset: used to return the offset within the RAMBlock
1021 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1023 RAMBlock *block = NULL;
1025 qemu_mutex_lock(&rs->src_page_req_mutex);
1026 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1027 struct RAMSrcPageRequest *entry =
1028 QSIMPLEQ_FIRST(&rs->src_page_requests);
1029 block = entry->rb;
1030 *offset = entry->offset;
1032 if (entry->len > TARGET_PAGE_SIZE) {
1033 entry->len -= TARGET_PAGE_SIZE;
1034 entry->offset += TARGET_PAGE_SIZE;
1035 } else {
1036 memory_region_unref(block->mr);
1037 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1038 g_free(entry);
1041 qemu_mutex_unlock(&rs->src_page_req_mutex);
1043 return block;
1047 * get_queued_page: unqueue a page from the postocpy requests
1049 * Skips pages that are already sent (!dirty)
1051 * Returns if a queued page is found
1053 * @rs: current RAM state
1054 * @pss: data about the state of the current dirty page scan
1056 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1058 RAMBlock *block;
1059 ram_addr_t offset;
1060 bool dirty;
1062 do {
1063 block = unqueue_page(rs, &offset);
1065 * We're sending this page, and since it's postcopy nothing else
1066 * will dirty it, and we must make sure it doesn't get sent again
1067 * even if this queue request was received after the background
1068 * search already sent it.
1070 if (block) {
1071 unsigned long page;
1073 page = offset >> TARGET_PAGE_BITS;
1074 dirty = test_bit(page, block->bmap);
1075 if (!dirty) {
1076 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1077 page, test_bit(page, block->unsentmap));
1078 } else {
1079 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
1083 } while (block && !dirty);
1085 if (block) {
1087 * As soon as we start servicing pages out of order, then we have
1088 * to kill the bulk stage, since the bulk stage assumes
1089 * in (migration_bitmap_find_and_reset_dirty) that every page is
1090 * dirty, that's no longer true.
1092 rs->ram_bulk_stage = false;
1095 * We want the background search to continue from the queued page
1096 * since the guest is likely to want other pages near to the page
1097 * it just requested.
1099 pss->block = block;
1100 pss->page = offset >> TARGET_PAGE_BITS;
1103 return !!block;
1107 * migration_page_queue_free: drop any remaining pages in the ram
1108 * request queue
1110 * It should be empty at the end anyway, but in error cases there may
1111 * be some left. in case that there is any page left, we drop it.
1114 static void migration_page_queue_free(RAMState *rs)
1116 struct RAMSrcPageRequest *mspr, *next_mspr;
1117 /* This queue generally should be empty - but in the case of a failed
1118 * migration might have some droppings in.
1120 rcu_read_lock();
1121 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
1122 memory_region_unref(mspr->rb->mr);
1123 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1124 g_free(mspr);
1126 rcu_read_unlock();
1130 * ram_save_queue_pages: queue the page for transmission
1132 * A request from postcopy destination for example.
1134 * Returns zero on success or negative on error
1136 * @rbname: Name of the RAMBLock of the request. NULL means the
1137 * same that last one.
1138 * @start: starting address from the start of the RAMBlock
1139 * @len: length (in bytes) to send
1141 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
1143 RAMBlock *ramblock;
1144 RAMState *rs = ram_state;
1146 ram_counters.postcopy_requests++;
1147 rcu_read_lock();
1148 if (!rbname) {
1149 /* Reuse last RAMBlock */
1150 ramblock = rs->last_req_rb;
1152 if (!ramblock) {
1154 * Shouldn't happen, we can't reuse the last RAMBlock if
1155 * it's the 1st request.
1157 error_report("ram_save_queue_pages no previous block");
1158 goto err;
1160 } else {
1161 ramblock = qemu_ram_block_by_name(rbname);
1163 if (!ramblock) {
1164 /* We shouldn't be asked for a non-existent RAMBlock */
1165 error_report("ram_save_queue_pages no block '%s'", rbname);
1166 goto err;
1168 rs->last_req_rb = ramblock;
1170 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1171 if (start+len > ramblock->used_length) {
1172 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1173 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1174 __func__, start, len, ramblock->used_length);
1175 goto err;
1178 struct RAMSrcPageRequest *new_entry =
1179 g_malloc0(sizeof(struct RAMSrcPageRequest));
1180 new_entry->rb = ramblock;
1181 new_entry->offset = start;
1182 new_entry->len = len;
1184 memory_region_ref(ramblock->mr);
1185 qemu_mutex_lock(&rs->src_page_req_mutex);
1186 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1187 qemu_mutex_unlock(&rs->src_page_req_mutex);
1188 rcu_read_unlock();
1190 return 0;
1192 err:
1193 rcu_read_unlock();
1194 return -1;
1198 * ram_save_target_page: save one target page
1200 * Returns the number of pages written
1202 * @rs: current RAM state
1203 * @ms: current migration state
1204 * @pss: data about the page we want to send
1205 * @last_stage: if we are at the completion stage
1207 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
1208 bool last_stage)
1210 int res = 0;
1212 /* Check the pages is dirty and if it is send it */
1213 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
1215 * If xbzrle is on, stop using the data compression after first
1216 * round of migration even if compression is enabled. In theory,
1217 * xbzrle can do better than compression.
1219 if (migrate_use_compression() &&
1220 (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
1221 res = ram_save_compressed_page(rs, pss, last_stage);
1222 } else {
1223 res = ram_save_page(rs, pss, last_stage);
1226 if (res < 0) {
1227 return res;
1229 if (pss->block->unsentmap) {
1230 clear_bit(pss->page, pss->block->unsentmap);
1234 return res;
1238 * ram_save_host_page: save a whole host page
1240 * Starting at *offset send pages up to the end of the current host
1241 * page. It's valid for the initial offset to point into the middle of
1242 * a host page in which case the remainder of the hostpage is sent.
1243 * Only dirty target pages are sent. Note that the host page size may
1244 * be a huge page for this block.
1245 * The saving stops at the boundary of the used_length of the block
1246 * if the RAMBlock isn't a multiple of the host page size.
1248 * Returns the number of pages written or negative on error
1250 * @rs: current RAM state
1251 * @ms: current migration state
1252 * @pss: data about the page we want to send
1253 * @last_stage: if we are at the completion stage
1255 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
1256 bool last_stage)
1258 int tmppages, pages = 0;
1259 size_t pagesize_bits =
1260 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
1262 do {
1263 tmppages = ram_save_target_page(rs, pss, last_stage);
1264 if (tmppages < 0) {
1265 return tmppages;
1268 pages += tmppages;
1269 pss->page++;
1270 } while ((pss->page & (pagesize_bits - 1)) &&
1271 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
1273 /* The offset we leave with is the last one we looked at */
1274 pss->page--;
1275 return pages;
1279 * ram_find_and_save_block: finds a dirty page and sends it to f
1281 * Called within an RCU critical section.
1283 * Returns the number of pages written where zero means no dirty pages
1285 * @rs: current RAM state
1286 * @last_stage: if we are at the completion stage
1288 * On systems where host-page-size > target-page-size it will send all the
1289 * pages in a host page that are dirty.
1292 static int ram_find_and_save_block(RAMState *rs, bool last_stage)
1294 PageSearchStatus pss;
1295 int pages = 0;
1296 bool again, found;
1298 /* No dirty page as there is zero RAM */
1299 if (!ram_bytes_total()) {
1300 return pages;
1303 pss.block = rs->last_seen_block;
1304 pss.page = rs->last_page;
1305 pss.complete_round = false;
1307 if (!pss.block) {
1308 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1311 do {
1312 again = true;
1313 found = get_queued_page(rs, &pss);
1315 if (!found) {
1316 /* priority queue empty, so just search for something dirty */
1317 found = find_dirty_block(rs, &pss, &again);
1320 if (found) {
1321 pages = ram_save_host_page(rs, &pss, last_stage);
1323 } while (!pages && again);
1325 rs->last_seen_block = pss.block;
1326 rs->last_page = pss.page;
1328 return pages;
1331 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1333 uint64_t pages = size / TARGET_PAGE_SIZE;
1335 if (zero) {
1336 ram_counters.duplicate += pages;
1337 } else {
1338 ram_counters.normal += pages;
1339 ram_counters.transferred += size;
1340 qemu_update_position(f, size);
1344 uint64_t ram_bytes_total(void)
1346 RAMBlock *block;
1347 uint64_t total = 0;
1349 rcu_read_lock();
1350 RAMBLOCK_FOREACH(block) {
1351 total += block->used_length;
1353 rcu_read_unlock();
1354 return total;
1357 void free_xbzrle_decoded_buf(void)
1359 g_free(xbzrle_decoded_buf);
1360 xbzrle_decoded_buf = NULL;
1363 static void ram_migration_cleanup(void *opaque)
1365 RAMState **rsp = opaque;
1366 RAMBlock *block;
1368 /* caller have hold iothread lock or is in a bh, so there is
1369 * no writing race against this migration_bitmap
1371 memory_global_dirty_log_stop();
1373 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1374 g_free(block->bmap);
1375 block->bmap = NULL;
1376 g_free(block->unsentmap);
1377 block->unsentmap = NULL;
1380 XBZRLE_cache_lock();
1381 if (XBZRLE.cache) {
1382 cache_fini(XBZRLE.cache);
1383 g_free(XBZRLE.encoded_buf);
1384 g_free(XBZRLE.current_buf);
1385 g_free(XBZRLE.zero_target_page);
1386 XBZRLE.cache = NULL;
1387 XBZRLE.encoded_buf = NULL;
1388 XBZRLE.current_buf = NULL;
1389 XBZRLE.zero_target_page = NULL;
1391 XBZRLE_cache_unlock();
1392 migration_page_queue_free(*rsp);
1393 g_free(*rsp);
1394 *rsp = NULL;
1397 static void ram_state_reset(RAMState *rs)
1399 rs->last_seen_block = NULL;
1400 rs->last_sent_block = NULL;
1401 rs->last_page = 0;
1402 rs->last_version = ram_list.version;
1403 rs->ram_bulk_stage = true;
1406 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1409 * 'expected' is the value you expect the bitmap mostly to be full
1410 * of; it won't bother printing lines that are all this value.
1411 * If 'todump' is null the migration bitmap is dumped.
1413 void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
1414 unsigned long pages)
1416 int64_t cur;
1417 int64_t linelen = 128;
1418 char linebuf[129];
1420 for (cur = 0; cur < pages; cur += linelen) {
1421 int64_t curb;
1422 bool found = false;
1424 * Last line; catch the case where the line length
1425 * is longer than remaining ram
1427 if (cur + linelen > pages) {
1428 linelen = pages - cur;
1430 for (curb = 0; curb < linelen; curb++) {
1431 bool thisbit = test_bit(cur + curb, todump);
1432 linebuf[curb] = thisbit ? '1' : '.';
1433 found = found || (thisbit != expected);
1435 if (found) {
1436 linebuf[curb] = '\0';
1437 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1442 /* **** functions for postcopy ***** */
1444 void ram_postcopy_migrated_memory_release(MigrationState *ms)
1446 struct RAMBlock *block;
1448 RAMBLOCK_FOREACH(block) {
1449 unsigned long *bitmap = block->bmap;
1450 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
1451 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
1453 while (run_start < range) {
1454 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1455 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
1456 (run_end - run_start) << TARGET_PAGE_BITS);
1457 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1463 * postcopy_send_discard_bm_ram: discard a RAMBlock
1465 * Returns zero on success
1467 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1468 * Note: At this point the 'unsentmap' is the processed bitmap combined
1469 * with the dirtymap; so a '1' means it's either dirty or unsent.
1471 * @ms: current migration state
1472 * @pds: state for postcopy
1473 * @start: RAMBlock starting page
1474 * @length: RAMBlock size
1476 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1477 PostcopyDiscardState *pds,
1478 RAMBlock *block)
1480 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
1481 unsigned long current;
1482 unsigned long *unsentmap = block->unsentmap;
1484 for (current = 0; current < end; ) {
1485 unsigned long one = find_next_bit(unsentmap, end, current);
1487 if (one <= end) {
1488 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1489 unsigned long discard_length;
1491 if (zero >= end) {
1492 discard_length = end - one;
1493 } else {
1494 discard_length = zero - one;
1496 if (discard_length) {
1497 postcopy_discard_send_range(ms, pds, one, discard_length);
1499 current = one + discard_length;
1500 } else {
1501 current = one;
1505 return 0;
1509 * postcopy_each_ram_send_discard: discard all RAMBlocks
1511 * Returns 0 for success or negative for error
1513 * Utility for the outgoing postcopy code.
1514 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1515 * passing it bitmap indexes and name.
1516 * (qemu_ram_foreach_block ends up passing unscaled lengths
1517 * which would mean postcopy code would have to deal with target page)
1519 * @ms: current migration state
1521 static int postcopy_each_ram_send_discard(MigrationState *ms)
1523 struct RAMBlock *block;
1524 int ret;
1526 RAMBLOCK_FOREACH(block) {
1527 PostcopyDiscardState *pds =
1528 postcopy_discard_send_init(ms, block->idstr);
1531 * Postcopy sends chunks of bitmap over the wire, but it
1532 * just needs indexes at this point, avoids it having
1533 * target page specific code.
1535 ret = postcopy_send_discard_bm_ram(ms, pds, block);
1536 postcopy_discard_send_finish(ms, pds);
1537 if (ret) {
1538 return ret;
1542 return 0;
1546 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1548 * Helper for postcopy_chunk_hostpages; it's called twice to
1549 * canonicalize the two bitmaps, that are similar, but one is
1550 * inverted.
1552 * Postcopy requires that all target pages in a hostpage are dirty or
1553 * clean, not a mix. This function canonicalizes the bitmaps.
1555 * @ms: current migration state
1556 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1557 * otherwise we need to canonicalize partially dirty host pages
1558 * @block: block that contains the page we want to canonicalize
1559 * @pds: state for postcopy
1561 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1562 RAMBlock *block,
1563 PostcopyDiscardState *pds)
1565 RAMState *rs = ram_state;
1566 unsigned long *bitmap = block->bmap;
1567 unsigned long *unsentmap = block->unsentmap;
1568 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
1569 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
1570 unsigned long run_start;
1572 if (block->page_size == TARGET_PAGE_SIZE) {
1573 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1574 return;
1577 if (unsent_pass) {
1578 /* Find a sent page */
1579 run_start = find_next_zero_bit(unsentmap, pages, 0);
1580 } else {
1581 /* Find a dirty page */
1582 run_start = find_next_bit(bitmap, pages, 0);
1585 while (run_start < pages) {
1586 bool do_fixup = false;
1587 unsigned long fixup_start_addr;
1588 unsigned long host_offset;
1591 * If the start of this run of pages is in the middle of a host
1592 * page, then we need to fixup this host page.
1594 host_offset = run_start % host_ratio;
1595 if (host_offset) {
1596 do_fixup = true;
1597 run_start -= host_offset;
1598 fixup_start_addr = run_start;
1599 /* For the next pass */
1600 run_start = run_start + host_ratio;
1601 } else {
1602 /* Find the end of this run */
1603 unsigned long run_end;
1604 if (unsent_pass) {
1605 run_end = find_next_bit(unsentmap, pages, run_start + 1);
1606 } else {
1607 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
1610 * If the end isn't at the start of a host page, then the
1611 * run doesn't finish at the end of a host page
1612 * and we need to discard.
1614 host_offset = run_end % host_ratio;
1615 if (host_offset) {
1616 do_fixup = true;
1617 fixup_start_addr = run_end - host_offset;
1619 * This host page has gone, the next loop iteration starts
1620 * from after the fixup
1622 run_start = fixup_start_addr + host_ratio;
1623 } else {
1625 * No discards on this iteration, next loop starts from
1626 * next sent/dirty page
1628 run_start = run_end + 1;
1632 if (do_fixup) {
1633 unsigned long page;
1635 /* Tell the destination to discard this page */
1636 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1637 /* For the unsent_pass we:
1638 * discard partially sent pages
1639 * For the !unsent_pass (dirty) we:
1640 * discard partially dirty pages that were sent
1641 * (any partially sent pages were already discarded
1642 * by the previous unsent_pass)
1644 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1645 host_ratio);
1648 /* Clean up the bitmap */
1649 for (page = fixup_start_addr;
1650 page < fixup_start_addr + host_ratio; page++) {
1651 /* All pages in this host page are now not sent */
1652 set_bit(page, unsentmap);
1655 * Remark them as dirty, updating the count for any pages
1656 * that weren't previously dirty.
1658 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
1662 if (unsent_pass) {
1663 /* Find the next sent page for the next iteration */
1664 run_start = find_next_zero_bit(unsentmap, pages, run_start);
1665 } else {
1666 /* Find the next dirty page for the next iteration */
1667 run_start = find_next_bit(bitmap, pages, run_start);
1673 * postcopy_chuck_hostpages: discrad any partially sent host page
1675 * Utility for the outgoing postcopy code.
1677 * Discard any partially sent host-page size chunks, mark any partially
1678 * dirty host-page size chunks as all dirty. In this case the host-page
1679 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1681 * Returns zero on success
1683 * @ms: current migration state
1684 * @block: block we want to work with
1686 static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
1688 PostcopyDiscardState *pds =
1689 postcopy_discard_send_init(ms, block->idstr);
1691 /* First pass: Discard all partially sent host pages */
1692 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1694 * Second pass: Ensure that all partially dirty host pages are made
1695 * fully dirty.
1697 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1699 postcopy_discard_send_finish(ms, pds);
1700 return 0;
1704 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1706 * Returns zero on success
1708 * Transmit the set of pages to be discarded after precopy to the target
1709 * these are pages that:
1710 * a) Have been previously transmitted but are now dirty again
1711 * b) Pages that have never been transmitted, this ensures that
1712 * any pages on the destination that have been mapped by background
1713 * tasks get discarded (transparent huge pages is the specific concern)
1714 * Hopefully this is pretty sparse
1716 * @ms: current migration state
1718 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1720 RAMState *rs = ram_state;
1721 RAMBlock *block;
1722 int ret;
1724 rcu_read_lock();
1726 /* This should be our last sync, the src is now paused */
1727 migration_bitmap_sync(rs);
1729 /* Easiest way to make sure we don't resume in the middle of a host-page */
1730 rs->last_seen_block = NULL;
1731 rs->last_sent_block = NULL;
1732 rs->last_page = 0;
1734 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1735 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
1736 unsigned long *bitmap = block->bmap;
1737 unsigned long *unsentmap = block->unsentmap;
1739 if (!unsentmap) {
1740 /* We don't have a safe way to resize the sentmap, so
1741 * if the bitmap was resized it will be NULL at this
1742 * point.
1744 error_report("migration ram resized during precopy phase");
1745 rcu_read_unlock();
1746 return -EINVAL;
1748 /* Deal with TPS != HPS and huge pages */
1749 ret = postcopy_chunk_hostpages(ms, block);
1750 if (ret) {
1751 rcu_read_unlock();
1752 return ret;
1756 * Update the unsentmap to be unsentmap = unsentmap | dirty
1758 bitmap_or(unsentmap, unsentmap, bitmap, pages);
1759 #ifdef DEBUG_POSTCOPY
1760 ram_debug_dump_bitmap(unsentmap, true, pages);
1761 #endif
1763 trace_ram_postcopy_send_discard_bitmap();
1765 ret = postcopy_each_ram_send_discard(ms);
1766 rcu_read_unlock();
1768 return ret;
1772 * ram_discard_range: discard dirtied pages at the beginning of postcopy
1774 * Returns zero on success
1776 * @rbname: name of the RAMBlock of the request. NULL means the
1777 * same that last one.
1778 * @start: RAMBlock starting page
1779 * @length: RAMBlock size
1781 int ram_discard_range(const char *rbname, uint64_t start, size_t length)
1783 int ret = -1;
1785 trace_ram_discard_range(rbname, start, length);
1787 rcu_read_lock();
1788 RAMBlock *rb = qemu_ram_block_by_name(rbname);
1790 if (!rb) {
1791 error_report("ram_discard_range: Failed to find block '%s'", rbname);
1792 goto err;
1795 ret = ram_block_discard_range(rb, start, length);
1797 err:
1798 rcu_read_unlock();
1800 return ret;
1803 static int ram_state_init(RAMState **rsp)
1805 *rsp = g_new0(RAMState, 1);
1807 qemu_mutex_init(&(*rsp)->bitmap_mutex);
1808 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
1809 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
1811 if (migrate_use_xbzrle()) {
1812 XBZRLE_cache_lock();
1813 XBZRLE.zero_target_page = g_malloc0(TARGET_PAGE_SIZE);
1814 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1815 TARGET_PAGE_SIZE,
1816 TARGET_PAGE_SIZE);
1817 if (!XBZRLE.cache) {
1818 XBZRLE_cache_unlock();
1819 error_report("Error creating cache");
1820 g_free(*rsp);
1821 *rsp = NULL;
1822 return -1;
1824 XBZRLE_cache_unlock();
1826 /* We prefer not to abort if there is no memory */
1827 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1828 if (!XBZRLE.encoded_buf) {
1829 error_report("Error allocating encoded_buf");
1830 g_free(*rsp);
1831 *rsp = NULL;
1832 return -1;
1835 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1836 if (!XBZRLE.current_buf) {
1837 error_report("Error allocating current_buf");
1838 g_free(XBZRLE.encoded_buf);
1839 XBZRLE.encoded_buf = NULL;
1840 g_free(*rsp);
1841 *rsp = NULL;
1842 return -1;
1846 /* For memory_global_dirty_log_start below. */
1847 qemu_mutex_lock_iothread();
1849 qemu_mutex_lock_ramlist();
1850 rcu_read_lock();
1851 ram_state_reset(*rsp);
1853 /* Skip setting bitmap if there is no RAM */
1854 if (ram_bytes_total()) {
1855 RAMBlock *block;
1857 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1858 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
1860 block->bmap = bitmap_new(pages);
1861 bitmap_set(block->bmap, 0, pages);
1862 if (migrate_postcopy_ram()) {
1863 block->unsentmap = bitmap_new(pages);
1864 bitmap_set(block->unsentmap, 0, pages);
1870 * Count the total number of pages used by ram blocks not including any
1871 * gaps due to alignment or unplugs.
1873 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1875 memory_global_dirty_log_start();
1876 migration_bitmap_sync(*rsp);
1877 qemu_mutex_unlock_ramlist();
1878 qemu_mutex_unlock_iothread();
1879 rcu_read_unlock();
1881 return 0;
1885 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1886 * long-running RCU critical section. When rcu-reclaims in the code
1887 * start to become numerous it will be necessary to reduce the
1888 * granularity of these critical sections.
1892 * ram_save_setup: Setup RAM for migration
1894 * Returns zero to indicate success and negative for error
1896 * @f: QEMUFile where to send the data
1897 * @opaque: RAMState pointer
1899 static int ram_save_setup(QEMUFile *f, void *opaque)
1901 RAMState **rsp = opaque;
1902 RAMBlock *block;
1904 /* migration has already setup the bitmap, reuse it. */
1905 if (!migration_in_colo_state()) {
1906 if (ram_state_init(rsp) != 0) {
1907 return -1;
1910 (*rsp)->f = f;
1912 rcu_read_lock();
1914 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1916 RAMBLOCK_FOREACH(block) {
1917 qemu_put_byte(f, strlen(block->idstr));
1918 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1919 qemu_put_be64(f, block->used_length);
1920 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
1921 qemu_put_be64(f, block->page_size);
1925 rcu_read_unlock();
1927 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
1928 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
1930 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1932 return 0;
1936 * ram_save_iterate: iterative stage for migration
1938 * Returns zero to indicate success and negative for error
1940 * @f: QEMUFile where to send the data
1941 * @opaque: RAMState pointer
1943 static int ram_save_iterate(QEMUFile *f, void *opaque)
1945 RAMState **temp = opaque;
1946 RAMState *rs = *temp;
1947 int ret;
1948 int i;
1949 int64_t t0;
1950 int done = 0;
1952 rcu_read_lock();
1953 if (ram_list.version != rs->last_version) {
1954 ram_state_reset(rs);
1957 /* Read version before ram_list.blocks */
1958 smp_rmb();
1960 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
1962 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1963 i = 0;
1964 while ((ret = qemu_file_rate_limit(f)) == 0) {
1965 int pages;
1967 pages = ram_find_and_save_block(rs, false);
1968 /* no more pages to sent */
1969 if (pages == 0) {
1970 done = 1;
1971 break;
1973 rs->iterations++;
1975 /* we want to check in the 1st loop, just in case it was the 1st time
1976 and we had to sync the dirty bitmap.
1977 qemu_get_clock_ns() is a bit expensive, so we only check each some
1978 iterations
1980 if ((i & 63) == 0) {
1981 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
1982 if (t1 > MAX_WAIT) {
1983 trace_ram_save_iterate_big_wait(t1, i);
1984 break;
1987 i++;
1989 flush_compressed_data(rs);
1990 rcu_read_unlock();
1993 * Must occur before EOS (or any QEMUFile operation)
1994 * because of RDMA protocol.
1996 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
1998 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1999 ram_counters.transferred += 8;
2001 ret = qemu_file_get_error(f);
2002 if (ret < 0) {
2003 return ret;
2006 return done;
2010 * ram_save_complete: function called to send the remaining amount of ram
2012 * Returns zero to indicate success
2014 * Called with iothread lock
2016 * @f: QEMUFile where to send the data
2017 * @opaque: RAMState pointer
2019 static int ram_save_complete(QEMUFile *f, void *opaque)
2021 RAMState **temp = opaque;
2022 RAMState *rs = *temp;
2024 rcu_read_lock();
2026 if (!migration_in_postcopy()) {
2027 migration_bitmap_sync(rs);
2030 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2032 /* try transferring iterative blocks of memory */
2034 /* flush all remaining blocks regardless of rate limiting */
2035 while (true) {
2036 int pages;
2038 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
2039 /* no more blocks to sent */
2040 if (pages == 0) {
2041 break;
2045 flush_compressed_data(rs);
2046 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2048 rcu_read_unlock();
2050 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2052 return 0;
2055 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2056 uint64_t *non_postcopiable_pending,
2057 uint64_t *postcopiable_pending)
2059 RAMState **temp = opaque;
2060 RAMState *rs = *temp;
2061 uint64_t remaining_size;
2063 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2065 if (!migration_in_postcopy() &&
2066 remaining_size < max_size) {
2067 qemu_mutex_lock_iothread();
2068 rcu_read_lock();
2069 migration_bitmap_sync(rs);
2070 rcu_read_unlock();
2071 qemu_mutex_unlock_iothread();
2072 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2075 /* We can do postcopy, and all the data is postcopiable */
2076 *postcopiable_pending += remaining_size;
2079 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2081 unsigned int xh_len;
2082 int xh_flags;
2083 uint8_t *loaded_data;
2085 if (!xbzrle_decoded_buf) {
2086 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2088 loaded_data = xbzrle_decoded_buf;
2090 /* extract RLE header */
2091 xh_flags = qemu_get_byte(f);
2092 xh_len = qemu_get_be16(f);
2094 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2095 error_report("Failed to load XBZRLE page - wrong compression!");
2096 return -1;
2099 if (xh_len > TARGET_PAGE_SIZE) {
2100 error_report("Failed to load XBZRLE page - len overflow!");
2101 return -1;
2103 /* load data and decode */
2104 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2106 /* decode RLE */
2107 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2108 TARGET_PAGE_SIZE) == -1) {
2109 error_report("Failed to load XBZRLE page - decode error!");
2110 return -1;
2113 return 0;
2117 * ram_block_from_stream: read a RAMBlock id from the migration stream
2119 * Must be called from within a rcu critical section.
2121 * Returns a pointer from within the RCU-protected ram_list.
2123 * @f: QEMUFile where to read the data from
2124 * @flags: Page flags (mostly to see if it's a continuation of previous block)
2126 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
2128 static RAMBlock *block = NULL;
2129 char id[256];
2130 uint8_t len;
2132 if (flags & RAM_SAVE_FLAG_CONTINUE) {
2133 if (!block) {
2134 error_report("Ack, bad migration stream!");
2135 return NULL;
2137 return block;
2140 len = qemu_get_byte(f);
2141 qemu_get_buffer(f, (uint8_t *)id, len);
2142 id[len] = 0;
2144 block = qemu_ram_block_by_name(id);
2145 if (!block) {
2146 error_report("Can't find block %s", id);
2147 return NULL;
2150 return block;
2153 static inline void *host_from_ram_block_offset(RAMBlock *block,
2154 ram_addr_t offset)
2156 if (!offset_in_ramblock(block, offset)) {
2157 return NULL;
2160 return block->host + offset;
2164 * ram_handle_compressed: handle the zero page case
2166 * If a page (or a whole RDMA chunk) has been
2167 * determined to be zero, then zap it.
2169 * @host: host address for the zero page
2170 * @ch: what the page is filled from. We only support zero
2171 * @size: size of the zero page
2173 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2175 if (ch != 0 || !is_zero_range(host, size)) {
2176 memset(host, ch, size);
2180 static void *do_data_decompress(void *opaque)
2182 DecompressParam *param = opaque;
2183 unsigned long pagesize;
2184 uint8_t *des;
2185 int len;
2187 qemu_mutex_lock(&param->mutex);
2188 while (!param->quit) {
2189 if (param->des) {
2190 des = param->des;
2191 len = param->len;
2192 param->des = 0;
2193 qemu_mutex_unlock(&param->mutex);
2195 pagesize = TARGET_PAGE_SIZE;
2196 /* uncompress() will return failed in some case, especially
2197 * when the page is dirted when doing the compression, it's
2198 * not a problem because the dirty page will be retransferred
2199 * and uncompress() won't break the data in other pages.
2201 uncompress((Bytef *)des, &pagesize,
2202 (const Bytef *)param->compbuf, len);
2204 qemu_mutex_lock(&decomp_done_lock);
2205 param->done = true;
2206 qemu_cond_signal(&decomp_done_cond);
2207 qemu_mutex_unlock(&decomp_done_lock);
2209 qemu_mutex_lock(&param->mutex);
2210 } else {
2211 qemu_cond_wait(&param->cond, &param->mutex);
2214 qemu_mutex_unlock(&param->mutex);
2216 return NULL;
2219 static void wait_for_decompress_done(void)
2221 int idx, thread_count;
2223 if (!migrate_use_compression()) {
2224 return;
2227 thread_count = migrate_decompress_threads();
2228 qemu_mutex_lock(&decomp_done_lock);
2229 for (idx = 0; idx < thread_count; idx++) {
2230 while (!decomp_param[idx].done) {
2231 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2234 qemu_mutex_unlock(&decomp_done_lock);
2237 void migrate_decompress_threads_create(void)
2239 int i, thread_count;
2241 thread_count = migrate_decompress_threads();
2242 decompress_threads = g_new0(QemuThread, thread_count);
2243 decomp_param = g_new0(DecompressParam, thread_count);
2244 qemu_mutex_init(&decomp_done_lock);
2245 qemu_cond_init(&decomp_done_cond);
2246 for (i = 0; i < thread_count; i++) {
2247 qemu_mutex_init(&decomp_param[i].mutex);
2248 qemu_cond_init(&decomp_param[i].cond);
2249 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2250 decomp_param[i].done = true;
2251 decomp_param[i].quit = false;
2252 qemu_thread_create(decompress_threads + i, "decompress",
2253 do_data_decompress, decomp_param + i,
2254 QEMU_THREAD_JOINABLE);
2258 void migrate_decompress_threads_join(void)
2260 int i, thread_count;
2262 thread_count = migrate_decompress_threads();
2263 for (i = 0; i < thread_count; i++) {
2264 qemu_mutex_lock(&decomp_param[i].mutex);
2265 decomp_param[i].quit = true;
2266 qemu_cond_signal(&decomp_param[i].cond);
2267 qemu_mutex_unlock(&decomp_param[i].mutex);
2269 for (i = 0; i < thread_count; i++) {
2270 qemu_thread_join(decompress_threads + i);
2271 qemu_mutex_destroy(&decomp_param[i].mutex);
2272 qemu_cond_destroy(&decomp_param[i].cond);
2273 g_free(decomp_param[i].compbuf);
2275 g_free(decompress_threads);
2276 g_free(decomp_param);
2277 decompress_threads = NULL;
2278 decomp_param = NULL;
2281 static void decompress_data_with_multi_threads(QEMUFile *f,
2282 void *host, int len)
2284 int idx, thread_count;
2286 thread_count = migrate_decompress_threads();
2287 qemu_mutex_lock(&decomp_done_lock);
2288 while (true) {
2289 for (idx = 0; idx < thread_count; idx++) {
2290 if (decomp_param[idx].done) {
2291 decomp_param[idx].done = false;
2292 qemu_mutex_lock(&decomp_param[idx].mutex);
2293 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2294 decomp_param[idx].des = host;
2295 decomp_param[idx].len = len;
2296 qemu_cond_signal(&decomp_param[idx].cond);
2297 qemu_mutex_unlock(&decomp_param[idx].mutex);
2298 break;
2301 if (idx < thread_count) {
2302 break;
2303 } else {
2304 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2307 qemu_mutex_unlock(&decomp_done_lock);
2311 * ram_postcopy_incoming_init: allocate postcopy data structures
2313 * Returns 0 for success and negative if there was one error
2315 * @mis: current migration incoming state
2317 * Allocate data structures etc needed by incoming migration with
2318 * postcopy-ram. postcopy-ram's similarly names
2319 * postcopy_ram_incoming_init does the work.
2321 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2323 unsigned long ram_pages = last_ram_page();
2325 return postcopy_ram_incoming_init(mis, ram_pages);
2329 * ram_load_postcopy: load a page in postcopy case
2331 * Returns 0 for success or -errno in case of error
2333 * Called in postcopy mode by ram_load().
2334 * rcu_read_lock is taken prior to this being called.
2336 * @f: QEMUFile where to send the data
2338 static int ram_load_postcopy(QEMUFile *f)
2340 int flags = 0, ret = 0;
2341 bool place_needed = false;
2342 bool matching_page_sizes = false;
2343 MigrationIncomingState *mis = migration_incoming_get_current();
2344 /* Temporary page that is later 'placed' */
2345 void *postcopy_host_page = postcopy_get_tmp_page(mis);
2346 void *last_host = NULL;
2347 bool all_zero = false;
2349 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2350 ram_addr_t addr;
2351 void *host = NULL;
2352 void *page_buffer = NULL;
2353 void *place_source = NULL;
2354 RAMBlock *block = NULL;
2355 uint8_t ch;
2357 addr = qemu_get_be64(f);
2358 flags = addr & ~TARGET_PAGE_MASK;
2359 addr &= TARGET_PAGE_MASK;
2361 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2362 place_needed = false;
2363 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
2364 block = ram_block_from_stream(f, flags);
2366 host = host_from_ram_block_offset(block, addr);
2367 if (!host) {
2368 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2369 ret = -EINVAL;
2370 break;
2372 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
2374 * Postcopy requires that we place whole host pages atomically;
2375 * these may be huge pages for RAMBlocks that are backed by
2376 * hugetlbfs.
2377 * To make it atomic, the data is read into a temporary page
2378 * that's moved into place later.
2379 * The migration protocol uses, possibly smaller, target-pages
2380 * however the source ensures it always sends all the components
2381 * of a host page in order.
2383 page_buffer = postcopy_host_page +
2384 ((uintptr_t)host & (block->page_size - 1));
2385 /* If all TP are zero then we can optimise the place */
2386 if (!((uintptr_t)host & (block->page_size - 1))) {
2387 all_zero = true;
2388 } else {
2389 /* not the 1st TP within the HP */
2390 if (host != (last_host + TARGET_PAGE_SIZE)) {
2391 error_report("Non-sequential target page %p/%p",
2392 host, last_host);
2393 ret = -EINVAL;
2394 break;
2400 * If it's the last part of a host page then we place the host
2401 * page
2403 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2404 (block->page_size - 1)) == 0;
2405 place_source = postcopy_host_page;
2407 last_host = host;
2409 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2410 case RAM_SAVE_FLAG_ZERO:
2411 ch = qemu_get_byte(f);
2412 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2413 if (ch) {
2414 all_zero = false;
2416 break;
2418 case RAM_SAVE_FLAG_PAGE:
2419 all_zero = false;
2420 if (!place_needed || !matching_page_sizes) {
2421 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2422 } else {
2423 /* Avoids the qemu_file copy during postcopy, which is
2424 * going to do a copy later; can only do it when we
2425 * do this read in one go (matching page sizes)
2427 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2428 TARGET_PAGE_SIZE);
2430 break;
2431 case RAM_SAVE_FLAG_EOS:
2432 /* normal exit */
2433 break;
2434 default:
2435 error_report("Unknown combination of migration flags: %#x"
2436 " (postcopy mode)", flags);
2437 ret = -EINVAL;
2440 if (place_needed) {
2441 /* This gets called at the last target page in the host page */
2442 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2444 if (all_zero) {
2445 ret = postcopy_place_page_zero(mis, place_dest,
2446 block->page_size);
2447 } else {
2448 ret = postcopy_place_page(mis, place_dest,
2449 place_source, block->page_size);
2452 if (!ret) {
2453 ret = qemu_file_get_error(f);
2457 return ret;
2460 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2462 int flags = 0, ret = 0;
2463 static uint64_t seq_iter;
2464 int len = 0;
2466 * If system is running in postcopy mode, page inserts to host memory must
2467 * be atomic
2469 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2470 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2471 bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
2473 seq_iter++;
2475 if (version_id != 4) {
2476 ret = -EINVAL;
2479 /* This RCU critical section can be very long running.
2480 * When RCU reclaims in the code start to become numerous,
2481 * it will be necessary to reduce the granularity of this
2482 * critical section.
2484 rcu_read_lock();
2486 if (postcopy_running) {
2487 ret = ram_load_postcopy(f);
2490 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2491 ram_addr_t addr, total_ram_bytes;
2492 void *host = NULL;
2493 uint8_t ch;
2495 addr = qemu_get_be64(f);
2496 flags = addr & ~TARGET_PAGE_MASK;
2497 addr &= TARGET_PAGE_MASK;
2499 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
2500 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2501 RAMBlock *block = ram_block_from_stream(f, flags);
2503 host = host_from_ram_block_offset(block, addr);
2504 if (!host) {
2505 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2506 ret = -EINVAL;
2507 break;
2509 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
2512 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2513 case RAM_SAVE_FLAG_MEM_SIZE:
2514 /* Synchronize RAM block list */
2515 total_ram_bytes = addr;
2516 while (!ret && total_ram_bytes) {
2517 RAMBlock *block;
2518 char id[256];
2519 ram_addr_t length;
2521 len = qemu_get_byte(f);
2522 qemu_get_buffer(f, (uint8_t *)id, len);
2523 id[len] = 0;
2524 length = qemu_get_be64(f);
2526 block = qemu_ram_block_by_name(id);
2527 if (block) {
2528 if (length != block->used_length) {
2529 Error *local_err = NULL;
2531 ret = qemu_ram_resize(block, length,
2532 &local_err);
2533 if (local_err) {
2534 error_report_err(local_err);
2537 /* For postcopy we need to check hugepage sizes match */
2538 if (postcopy_advised &&
2539 block->page_size != qemu_host_page_size) {
2540 uint64_t remote_page_size = qemu_get_be64(f);
2541 if (remote_page_size != block->page_size) {
2542 error_report("Mismatched RAM page size %s "
2543 "(local) %zd != %" PRId64,
2544 id, block->page_size,
2545 remote_page_size);
2546 ret = -EINVAL;
2549 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2550 block->idstr);
2551 } else {
2552 error_report("Unknown ramblock \"%s\", cannot "
2553 "accept migration", id);
2554 ret = -EINVAL;
2557 total_ram_bytes -= length;
2559 break;
2561 case RAM_SAVE_FLAG_ZERO:
2562 ch = qemu_get_byte(f);
2563 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2564 break;
2566 case RAM_SAVE_FLAG_PAGE:
2567 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2568 break;
2570 case RAM_SAVE_FLAG_COMPRESS_PAGE:
2571 len = qemu_get_be32(f);
2572 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2573 error_report("Invalid compressed data length: %d", len);
2574 ret = -EINVAL;
2575 break;
2577 decompress_data_with_multi_threads(f, host, len);
2578 break;
2580 case RAM_SAVE_FLAG_XBZRLE:
2581 if (load_xbzrle(f, addr, host) < 0) {
2582 error_report("Failed to decompress XBZRLE page at "
2583 RAM_ADDR_FMT, addr);
2584 ret = -EINVAL;
2585 break;
2587 break;
2588 case RAM_SAVE_FLAG_EOS:
2589 /* normal exit */
2590 break;
2591 default:
2592 if (flags & RAM_SAVE_FLAG_HOOK) {
2593 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2594 } else {
2595 error_report("Unknown combination of migration flags: %#x",
2596 flags);
2597 ret = -EINVAL;
2600 if (!ret) {
2601 ret = qemu_file_get_error(f);
2605 wait_for_decompress_done();
2606 rcu_read_unlock();
2607 trace_ram_load_complete(ret, seq_iter);
2608 return ret;
2611 static SaveVMHandlers savevm_ram_handlers = {
2612 .save_live_setup = ram_save_setup,
2613 .save_live_iterate = ram_save_iterate,
2614 .save_live_complete_postcopy = ram_save_complete,
2615 .save_live_complete_precopy = ram_save_complete,
2616 .save_live_pending = ram_save_pending,
2617 .load_state = ram_load,
2618 .cleanup = ram_migration_cleanup,
2621 void ram_mig_init(void)
2623 qemu_mutex_init(&XBZRLE.lock);
2624 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);