spapr: ensure core_slot isn't NULL in spapr_core_unplug()
[qemu/ar7.git] / migration / ram.c
blobc07a9c08d94d0449609a37a0272cae2389f563b2
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
28 #include "qemu/osdep.h"
29 #include "qemu-common.h"
30 #include "cpu.h"
31 #include <zlib.h>
32 #include "qapi-event.h"
33 #include "qemu/cutils.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "qemu/timer.h"
37 #include "qemu/main-loop.h"
38 #include "xbzrle.h"
39 #include "migration/migration.h"
40 #include "migration/qemu-file.h"
41 #include "migration/vmstate.h"
42 #include "postcopy-ram.h"
43 #include "exec/address-spaces.h"
44 #include "migration/page_cache.h"
45 #include "qemu/error-report.h"
46 #include "trace.h"
47 #include "exec/ram_addr.h"
48 #include "qemu/rcu_queue.h"
49 #include "migration/colo.h"
51 /***********************************************************/
52 /* ram save/restore */
54 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
55 * worked for pages that where filled with the same char. We switched
56 * it to only search for the zero value. And to avoid confusion with
57 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
60 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
61 #define RAM_SAVE_FLAG_ZERO 0x02
62 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
63 #define RAM_SAVE_FLAG_PAGE 0x08
64 #define RAM_SAVE_FLAG_EOS 0x10
65 #define RAM_SAVE_FLAG_CONTINUE 0x20
66 #define RAM_SAVE_FLAG_XBZRLE 0x40
67 /* 0x80 is reserved in migration.h start with 0x100 next */
68 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
70 static uint8_t *ZERO_TARGET_PAGE;
72 static inline bool is_zero_range(uint8_t *p, uint64_t size)
74 return buffer_is_zero(p, size);
77 /* struct contains XBZRLE cache and a static page
78 used by the compression */
79 static struct {
80 /* buffer used for XBZRLE encoding */
81 uint8_t *encoded_buf;
82 /* buffer for storing page content */
83 uint8_t *current_buf;
84 /* Cache for XBZRLE, Protected by lock. */
85 PageCache *cache;
86 QemuMutex lock;
87 } XBZRLE;
89 /* buffer used for XBZRLE decoding */
90 static uint8_t *xbzrle_decoded_buf;
92 static void XBZRLE_cache_lock(void)
94 if (migrate_use_xbzrle())
95 qemu_mutex_lock(&XBZRLE.lock);
98 static void XBZRLE_cache_unlock(void)
100 if (migrate_use_xbzrle())
101 qemu_mutex_unlock(&XBZRLE.lock);
105 * xbzrle_cache_resize: resize the xbzrle cache
107 * This function is called from qmp_migrate_set_cache_size in main
108 * thread, possibly while a migration is in progress. A running
109 * migration may be using the cache and might finish during this call,
110 * hence changes to the cache are protected by XBZRLE.lock().
112 * Returns the new_size or negative in case of error.
114 * @new_size: new cache size
116 int64_t xbzrle_cache_resize(int64_t new_size)
118 PageCache *new_cache;
119 int64_t ret;
121 if (new_size < TARGET_PAGE_SIZE) {
122 return -1;
125 XBZRLE_cache_lock();
127 if (XBZRLE.cache != NULL) {
128 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
129 goto out_new_size;
131 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
132 TARGET_PAGE_SIZE);
133 if (!new_cache) {
134 error_report("Error creating cache");
135 ret = -1;
136 goto out;
139 cache_fini(XBZRLE.cache);
140 XBZRLE.cache = new_cache;
143 out_new_size:
144 ret = pow2floor(new_size);
145 out:
146 XBZRLE_cache_unlock();
147 return ret;
151 * An outstanding page request, on the source, having been received
152 * and queued
154 struct RAMSrcPageRequest {
155 RAMBlock *rb;
156 hwaddr offset;
157 hwaddr len;
159 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
162 /* State of RAM for migration */
163 struct RAMState {
164 /* QEMUFile used for this migration */
165 QEMUFile *f;
166 /* Last block that we have visited searching for dirty pages */
167 RAMBlock *last_seen_block;
168 /* Last block from where we have sent data */
169 RAMBlock *last_sent_block;
170 /* Last dirty target page we have sent */
171 ram_addr_t last_page;
172 /* last ram version we have seen */
173 uint32_t last_version;
174 /* We are in the first round */
175 bool ram_bulk_stage;
176 /* How many times we have dirty too many pages */
177 int dirty_rate_high_cnt;
178 /* How many times we have synchronized the bitmap */
179 uint64_t bitmap_sync_count;
180 /* these variables are used for bitmap sync */
181 /* last time we did a full bitmap_sync */
182 int64_t time_last_bitmap_sync;
183 /* bytes transferred at start_time */
184 uint64_t bytes_xfer_prev;
185 /* number of dirty pages since start_time */
186 uint64_t num_dirty_pages_period;
187 /* xbzrle misses since the beginning of the period */
188 uint64_t xbzrle_cache_miss_prev;
189 /* number of iterations at the beginning of period */
190 uint64_t iterations_prev;
191 /* Accounting fields */
192 /* number of zero pages. It used to be pages filled by the same char. */
193 uint64_t zero_pages;
194 /* number of normal transferred pages */
195 uint64_t norm_pages;
196 /* Iterations since start */
197 uint64_t iterations;
198 /* xbzrle transmitted bytes. Notice that this is with
199 * compression, they can't be calculated from the pages */
200 uint64_t xbzrle_bytes;
201 /* xbzrle transmmited pages */
202 uint64_t xbzrle_pages;
203 /* xbzrle number of cache miss */
204 uint64_t xbzrle_cache_miss;
205 /* xbzrle miss rate */
206 double xbzrle_cache_miss_rate;
207 /* xbzrle number of overflows */
208 uint64_t xbzrle_overflows;
209 /* number of dirty bits in the bitmap */
210 uint64_t migration_dirty_pages;
211 /* total number of bytes transferred */
212 uint64_t bytes_transferred;
213 /* number of dirtied pages in the last second */
214 uint64_t dirty_pages_rate;
215 /* Count of requests incoming from destination */
216 uint64_t postcopy_requests;
217 /* protects modification of the bitmap */
218 QemuMutex bitmap_mutex;
219 /* The RAMBlock used in the last src_page_requests */
220 RAMBlock *last_req_rb;
221 /* Queue of outstanding page requests from the destination */
222 QemuMutex src_page_req_mutex;
223 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
225 typedef struct RAMState RAMState;
227 static RAMState ram_state;
229 uint64_t dup_mig_pages_transferred(void)
231 return ram_state.zero_pages;
234 uint64_t norm_mig_pages_transferred(void)
236 return ram_state.norm_pages;
239 uint64_t xbzrle_mig_bytes_transferred(void)
241 return ram_state.xbzrle_bytes;
244 uint64_t xbzrle_mig_pages_transferred(void)
246 return ram_state.xbzrle_pages;
249 uint64_t xbzrle_mig_pages_cache_miss(void)
251 return ram_state.xbzrle_cache_miss;
254 double xbzrle_mig_cache_miss_rate(void)
256 return ram_state.xbzrle_cache_miss_rate;
259 uint64_t xbzrle_mig_pages_overflow(void)
261 return ram_state.xbzrle_overflows;
264 uint64_t ram_bytes_transferred(void)
266 return ram_state.bytes_transferred;
269 uint64_t ram_bytes_remaining(void)
271 return ram_state.migration_dirty_pages * TARGET_PAGE_SIZE;
274 uint64_t ram_dirty_sync_count(void)
276 return ram_state.bitmap_sync_count;
279 uint64_t ram_dirty_pages_rate(void)
281 return ram_state.dirty_pages_rate;
284 uint64_t ram_postcopy_requests(void)
286 return ram_state.postcopy_requests;
289 /* used by the search for pages to send */
290 struct PageSearchStatus {
291 /* Current block being searched */
292 RAMBlock *block;
293 /* Current page to search from */
294 unsigned long page;
295 /* Set once we wrap around */
296 bool complete_round;
298 typedef struct PageSearchStatus PageSearchStatus;
300 struct CompressParam {
301 bool done;
302 bool quit;
303 QEMUFile *file;
304 QemuMutex mutex;
305 QemuCond cond;
306 RAMBlock *block;
307 ram_addr_t offset;
309 typedef struct CompressParam CompressParam;
311 struct DecompressParam {
312 bool done;
313 bool quit;
314 QemuMutex mutex;
315 QemuCond cond;
316 void *des;
317 uint8_t *compbuf;
318 int len;
320 typedef struct DecompressParam DecompressParam;
322 static CompressParam *comp_param;
323 static QemuThread *compress_threads;
324 /* comp_done_cond is used to wake up the migration thread when
325 * one of the compression threads has finished the compression.
326 * comp_done_lock is used to co-work with comp_done_cond.
328 static QemuMutex comp_done_lock;
329 static QemuCond comp_done_cond;
330 /* The empty QEMUFileOps will be used by file in CompressParam */
331 static const QEMUFileOps empty_ops = { };
333 static DecompressParam *decomp_param;
334 static QemuThread *decompress_threads;
335 static QemuMutex decomp_done_lock;
336 static QemuCond decomp_done_cond;
338 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
339 ram_addr_t offset);
341 static void *do_data_compress(void *opaque)
343 CompressParam *param = opaque;
344 RAMBlock *block;
345 ram_addr_t offset;
347 qemu_mutex_lock(&param->mutex);
348 while (!param->quit) {
349 if (param->block) {
350 block = param->block;
351 offset = param->offset;
352 param->block = NULL;
353 qemu_mutex_unlock(&param->mutex);
355 do_compress_ram_page(param->file, block, offset);
357 qemu_mutex_lock(&comp_done_lock);
358 param->done = true;
359 qemu_cond_signal(&comp_done_cond);
360 qemu_mutex_unlock(&comp_done_lock);
362 qemu_mutex_lock(&param->mutex);
363 } else {
364 qemu_cond_wait(&param->cond, &param->mutex);
367 qemu_mutex_unlock(&param->mutex);
369 return NULL;
372 static inline void terminate_compression_threads(void)
374 int idx, thread_count;
376 thread_count = migrate_compress_threads();
378 for (idx = 0; idx < thread_count; idx++) {
379 qemu_mutex_lock(&comp_param[idx].mutex);
380 comp_param[idx].quit = true;
381 qemu_cond_signal(&comp_param[idx].cond);
382 qemu_mutex_unlock(&comp_param[idx].mutex);
386 void migrate_compress_threads_join(void)
388 int i, thread_count;
390 if (!migrate_use_compression()) {
391 return;
393 terminate_compression_threads();
394 thread_count = migrate_compress_threads();
395 for (i = 0; i < thread_count; i++) {
396 qemu_thread_join(compress_threads + i);
397 qemu_fclose(comp_param[i].file);
398 qemu_mutex_destroy(&comp_param[i].mutex);
399 qemu_cond_destroy(&comp_param[i].cond);
401 qemu_mutex_destroy(&comp_done_lock);
402 qemu_cond_destroy(&comp_done_cond);
403 g_free(compress_threads);
404 g_free(comp_param);
405 compress_threads = NULL;
406 comp_param = NULL;
409 void migrate_compress_threads_create(void)
411 int i, thread_count;
413 if (!migrate_use_compression()) {
414 return;
416 thread_count = migrate_compress_threads();
417 compress_threads = g_new0(QemuThread, thread_count);
418 comp_param = g_new0(CompressParam, thread_count);
419 qemu_cond_init(&comp_done_cond);
420 qemu_mutex_init(&comp_done_lock);
421 for (i = 0; i < thread_count; i++) {
422 /* comp_param[i].file is just used as a dummy buffer to save data,
423 * set its ops to empty.
425 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
426 comp_param[i].done = true;
427 comp_param[i].quit = false;
428 qemu_mutex_init(&comp_param[i].mutex);
429 qemu_cond_init(&comp_param[i].cond);
430 qemu_thread_create(compress_threads + i, "compress",
431 do_data_compress, comp_param + i,
432 QEMU_THREAD_JOINABLE);
437 * save_page_header: write page header to wire
439 * If this is the 1st block, it also writes the block identification
441 * Returns the number of bytes written
443 * @f: QEMUFile where to send the data
444 * @block: block that contains the page we want to send
445 * @offset: offset inside the block for the page
446 * in the lower bits, it contains flags
448 static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
449 ram_addr_t offset)
451 size_t size, len;
453 if (block == rs->last_sent_block) {
454 offset |= RAM_SAVE_FLAG_CONTINUE;
456 qemu_put_be64(f, offset);
457 size = 8;
459 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
460 len = strlen(block->idstr);
461 qemu_put_byte(f, len);
462 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
463 size += 1 + len;
464 rs->last_sent_block = block;
466 return size;
470 * mig_throttle_guest_down: throotle down the guest
472 * Reduce amount of guest cpu execution to hopefully slow down memory
473 * writes. If guest dirty memory rate is reduced below the rate at
474 * which we can transfer pages to the destination then we should be
475 * able to complete migration. Some workloads dirty memory way too
476 * fast and will not effectively converge, even with auto-converge.
478 static void mig_throttle_guest_down(void)
480 MigrationState *s = migrate_get_current();
481 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
482 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
484 /* We have not started throttling yet. Let's start it. */
485 if (!cpu_throttle_active()) {
486 cpu_throttle_set(pct_initial);
487 } else {
488 /* Throttling already on, just increase the rate */
489 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
494 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
496 * @rs: current RAM state
497 * @current_addr: address for the zero page
499 * Update the xbzrle cache to reflect a page that's been sent as all 0.
500 * The important thing is that a stale (not-yet-0'd) page be replaced
501 * by the new data.
502 * As a bonus, if the page wasn't in the cache it gets added so that
503 * when a small write is made into the 0'd page it gets XBZRLE sent.
505 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
507 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
508 return;
511 /* We don't care if this fails to allocate a new cache page
512 * as long as it updated an old one */
513 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
514 rs->bitmap_sync_count);
517 #define ENCODING_FLAG_XBZRLE 0x1
520 * save_xbzrle_page: compress and send current page
522 * Returns: 1 means that we wrote the page
523 * 0 means that page is identical to the one already sent
524 * -1 means that xbzrle would be longer than normal
526 * @rs: current RAM state
527 * @current_data: pointer to the address of the page contents
528 * @current_addr: addr of the page
529 * @block: block that contains the page we want to send
530 * @offset: offset inside the block for the page
531 * @last_stage: if we are at the completion stage
533 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
534 ram_addr_t current_addr, RAMBlock *block,
535 ram_addr_t offset, bool last_stage)
537 int encoded_len = 0, bytes_xbzrle;
538 uint8_t *prev_cached_page;
540 if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) {
541 rs->xbzrle_cache_miss++;
542 if (!last_stage) {
543 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
544 rs->bitmap_sync_count) == -1) {
545 return -1;
546 } else {
547 /* update *current_data when the page has been
548 inserted into cache */
549 *current_data = get_cached_data(XBZRLE.cache, current_addr);
552 return -1;
555 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
557 /* save current buffer into memory */
558 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
560 /* XBZRLE encoding (if there is no overflow) */
561 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
562 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
563 TARGET_PAGE_SIZE);
564 if (encoded_len == 0) {
565 trace_save_xbzrle_page_skipping();
566 return 0;
567 } else if (encoded_len == -1) {
568 trace_save_xbzrle_page_overflow();
569 rs->xbzrle_overflows++;
570 /* update data in the cache */
571 if (!last_stage) {
572 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
573 *current_data = prev_cached_page;
575 return -1;
578 /* we need to update the data in the cache, in order to get the same data */
579 if (!last_stage) {
580 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
583 /* Send XBZRLE based compressed page */
584 bytes_xbzrle = save_page_header(rs, rs->f, block,
585 offset | RAM_SAVE_FLAG_XBZRLE);
586 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
587 qemu_put_be16(rs->f, encoded_len);
588 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
589 bytes_xbzrle += encoded_len + 1 + 2;
590 rs->xbzrle_pages++;
591 rs->xbzrle_bytes += bytes_xbzrle;
592 rs->bytes_transferred += bytes_xbzrle;
594 return 1;
598 * migration_bitmap_find_dirty: find the next dirty page from start
600 * Called with rcu_read_lock() to protect migration_bitmap
602 * Returns the byte offset within memory region of the start of a dirty page
604 * @rs: current RAM state
605 * @rb: RAMBlock where to search for dirty pages
606 * @start: page where we start the search
608 static inline
609 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
610 unsigned long start)
612 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
613 unsigned long *bitmap = rb->bmap;
614 unsigned long next;
616 if (rs->ram_bulk_stage && start > 0) {
617 next = start + 1;
618 } else {
619 next = find_next_bit(bitmap, size, start);
622 return next;
625 static inline bool migration_bitmap_clear_dirty(RAMState *rs,
626 RAMBlock *rb,
627 unsigned long page)
629 bool ret;
631 ret = test_and_clear_bit(page, rb->bmap);
633 if (ret) {
634 rs->migration_dirty_pages--;
636 return ret;
639 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
640 ram_addr_t start, ram_addr_t length)
642 rs->migration_dirty_pages +=
643 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
644 &rs->num_dirty_pages_period);
648 * ram_pagesize_summary: calculate all the pagesizes of a VM
650 * Returns a summary bitmap of the page sizes of all RAMBlocks
652 * For VMs with just normal pages this is equivalent to the host page
653 * size. If it's got some huge pages then it's the OR of all the
654 * different page sizes.
656 uint64_t ram_pagesize_summary(void)
658 RAMBlock *block;
659 uint64_t summary = 0;
661 RAMBLOCK_FOREACH(block) {
662 summary |= block->page_size;
665 return summary;
668 static void migration_bitmap_sync(RAMState *rs)
670 RAMBlock *block;
671 int64_t end_time;
672 uint64_t bytes_xfer_now;
674 rs->bitmap_sync_count++;
676 if (!rs->bytes_xfer_prev) {
677 rs->bytes_xfer_prev = ram_bytes_transferred();
680 if (!rs->time_last_bitmap_sync) {
681 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
684 trace_migration_bitmap_sync_start();
685 memory_global_dirty_log_sync();
687 qemu_mutex_lock(&rs->bitmap_mutex);
688 rcu_read_lock();
689 RAMBLOCK_FOREACH(block) {
690 migration_bitmap_sync_range(rs, block, 0, block->used_length);
692 rcu_read_unlock();
693 qemu_mutex_unlock(&rs->bitmap_mutex);
695 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
697 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
699 /* more than 1 second = 1000 millisecons */
700 if (end_time > rs->time_last_bitmap_sync + 1000) {
701 if (migrate_auto_converge()) {
702 /* The following detection logic can be refined later. For now:
703 Check to see if the dirtied bytes is 50% more than the approx.
704 amount of bytes that just got transferred since the last time we
705 were in this routine. If that happens twice, start or increase
706 throttling */
707 bytes_xfer_now = ram_bytes_transferred();
709 if (rs->dirty_pages_rate &&
710 (rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
711 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
712 (rs->dirty_rate_high_cnt++ >= 2)) {
713 trace_migration_throttle();
714 rs->dirty_rate_high_cnt = 0;
715 mig_throttle_guest_down();
717 rs->bytes_xfer_prev = bytes_xfer_now;
720 if (migrate_use_xbzrle()) {
721 if (rs->iterations_prev != rs->iterations) {
722 rs->xbzrle_cache_miss_rate =
723 (double)(rs->xbzrle_cache_miss -
724 rs->xbzrle_cache_miss_prev) /
725 (rs->iterations - rs->iterations_prev);
727 rs->iterations_prev = rs->iterations;
728 rs->xbzrle_cache_miss_prev = rs->xbzrle_cache_miss;
730 rs->dirty_pages_rate = rs->num_dirty_pages_period * 1000
731 / (end_time - rs->time_last_bitmap_sync);
732 rs->time_last_bitmap_sync = end_time;
733 rs->num_dirty_pages_period = 0;
735 if (migrate_use_events()) {
736 qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL);
741 * save_zero_page: send the zero page to the stream
743 * Returns the number of pages written.
745 * @rs: current RAM state
746 * @block: block that contains the page we want to send
747 * @offset: offset inside the block for the page
748 * @p: pointer to the page
750 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
751 uint8_t *p)
753 int pages = -1;
755 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
756 rs->zero_pages++;
757 rs->bytes_transferred +=
758 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
759 qemu_put_byte(rs->f, 0);
760 rs->bytes_transferred += 1;
761 pages = 1;
764 return pages;
767 static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
769 if (!migrate_release_ram() || !migration_in_postcopy()) {
770 return;
773 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
777 * ram_save_page: send the given page to the stream
779 * Returns the number of pages written.
780 * < 0 - error
781 * >=0 - Number of pages written - this might legally be 0
782 * if xbzrle noticed the page was the same.
784 * @rs: current RAM state
785 * @block: block that contains the page we want to send
786 * @offset: offset inside the block for the page
787 * @last_stage: if we are at the completion stage
789 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
791 int pages = -1;
792 uint64_t bytes_xmit;
793 ram_addr_t current_addr;
794 uint8_t *p;
795 int ret;
796 bool send_async = true;
797 RAMBlock *block = pss->block;
798 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
800 p = block->host + offset;
801 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
803 /* In doubt sent page as normal */
804 bytes_xmit = 0;
805 ret = ram_control_save_page(rs->f, block->offset,
806 offset, TARGET_PAGE_SIZE, &bytes_xmit);
807 if (bytes_xmit) {
808 rs->bytes_transferred += bytes_xmit;
809 pages = 1;
812 XBZRLE_cache_lock();
814 current_addr = block->offset + offset;
816 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
817 if (ret != RAM_SAVE_CONTROL_DELAYED) {
818 if (bytes_xmit > 0) {
819 rs->norm_pages++;
820 } else if (bytes_xmit == 0) {
821 rs->zero_pages++;
824 } else {
825 pages = save_zero_page(rs, block, offset, p);
826 if (pages > 0) {
827 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
828 * page would be stale
830 xbzrle_cache_zero_page(rs, current_addr);
831 ram_release_pages(block->idstr, offset, pages);
832 } else if (!rs->ram_bulk_stage &&
833 !migration_in_postcopy() && migrate_use_xbzrle()) {
834 pages = save_xbzrle_page(rs, &p, current_addr, block,
835 offset, last_stage);
836 if (!last_stage) {
837 /* Can't send this cached data async, since the cache page
838 * might get updated before it gets to the wire
840 send_async = false;
845 /* XBZRLE overflow or normal page */
846 if (pages == -1) {
847 rs->bytes_transferred += save_page_header(rs, rs->f, block,
848 offset | RAM_SAVE_FLAG_PAGE);
849 if (send_async) {
850 qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
851 migrate_release_ram() &
852 migration_in_postcopy());
853 } else {
854 qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
856 rs->bytes_transferred += TARGET_PAGE_SIZE;
857 pages = 1;
858 rs->norm_pages++;
861 XBZRLE_cache_unlock();
863 return pages;
866 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
867 ram_addr_t offset)
869 RAMState *rs = &ram_state;
870 int bytes_sent, blen;
871 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
873 bytes_sent = save_page_header(rs, f, block, offset |
874 RAM_SAVE_FLAG_COMPRESS_PAGE);
875 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
876 migrate_compress_level());
877 if (blen < 0) {
878 bytes_sent = 0;
879 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
880 error_report("compressed data failed!");
881 } else {
882 bytes_sent += blen;
883 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
886 return bytes_sent;
889 static void flush_compressed_data(RAMState *rs)
891 int idx, len, thread_count;
893 if (!migrate_use_compression()) {
894 return;
896 thread_count = migrate_compress_threads();
898 qemu_mutex_lock(&comp_done_lock);
899 for (idx = 0; idx < thread_count; idx++) {
900 while (!comp_param[idx].done) {
901 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
904 qemu_mutex_unlock(&comp_done_lock);
906 for (idx = 0; idx < thread_count; idx++) {
907 qemu_mutex_lock(&comp_param[idx].mutex);
908 if (!comp_param[idx].quit) {
909 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
910 rs->bytes_transferred += len;
912 qemu_mutex_unlock(&comp_param[idx].mutex);
916 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
917 ram_addr_t offset)
919 param->block = block;
920 param->offset = offset;
923 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
924 ram_addr_t offset)
926 int idx, thread_count, bytes_xmit = -1, pages = -1;
928 thread_count = migrate_compress_threads();
929 qemu_mutex_lock(&comp_done_lock);
930 while (true) {
931 for (idx = 0; idx < thread_count; idx++) {
932 if (comp_param[idx].done) {
933 comp_param[idx].done = false;
934 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
935 qemu_mutex_lock(&comp_param[idx].mutex);
936 set_compress_params(&comp_param[idx], block, offset);
937 qemu_cond_signal(&comp_param[idx].cond);
938 qemu_mutex_unlock(&comp_param[idx].mutex);
939 pages = 1;
940 rs->norm_pages++;
941 rs->bytes_transferred += bytes_xmit;
942 break;
945 if (pages > 0) {
946 break;
947 } else {
948 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
951 qemu_mutex_unlock(&comp_done_lock);
953 return pages;
957 * ram_save_compressed_page: compress the given page and send it to the stream
959 * Returns the number of pages written.
961 * @rs: current RAM state
962 * @block: block that contains the page we want to send
963 * @offset: offset inside the block for the page
964 * @last_stage: if we are at the completion stage
966 static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss,
967 bool last_stage)
969 int pages = -1;
970 uint64_t bytes_xmit = 0;
971 uint8_t *p;
972 int ret, blen;
973 RAMBlock *block = pss->block;
974 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
976 p = block->host + offset;
978 ret = ram_control_save_page(rs->f, block->offset,
979 offset, TARGET_PAGE_SIZE, &bytes_xmit);
980 if (bytes_xmit) {
981 rs->bytes_transferred += bytes_xmit;
982 pages = 1;
984 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
985 if (ret != RAM_SAVE_CONTROL_DELAYED) {
986 if (bytes_xmit > 0) {
987 rs->norm_pages++;
988 } else if (bytes_xmit == 0) {
989 rs->zero_pages++;
992 } else {
993 /* When starting the process of a new block, the first page of
994 * the block should be sent out before other pages in the same
995 * block, and all the pages in last block should have been sent
996 * out, keeping this order is important, because the 'cont' flag
997 * is used to avoid resending the block name.
999 if (block != rs->last_sent_block) {
1000 flush_compressed_data(rs);
1001 pages = save_zero_page(rs, block, offset, p);
1002 if (pages == -1) {
1003 /* Make sure the first page is sent out before other pages */
1004 bytes_xmit = save_page_header(rs, rs->f, block, offset |
1005 RAM_SAVE_FLAG_COMPRESS_PAGE);
1006 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE,
1007 migrate_compress_level());
1008 if (blen > 0) {
1009 rs->bytes_transferred += bytes_xmit + blen;
1010 rs->norm_pages++;
1011 pages = 1;
1012 } else {
1013 qemu_file_set_error(rs->f, blen);
1014 error_report("compressed data failed!");
1017 if (pages > 0) {
1018 ram_release_pages(block->idstr, offset, pages);
1020 } else {
1021 pages = save_zero_page(rs, block, offset, p);
1022 if (pages == -1) {
1023 pages = compress_page_with_multi_thread(rs, block, offset);
1024 } else {
1025 ram_release_pages(block->idstr, offset, pages);
1030 return pages;
1034 * find_dirty_block: find the next dirty page and update any state
1035 * associated with the search process.
1037 * Returns if a page is found
1039 * @rs: current RAM state
1040 * @pss: data about the state of the current dirty page scan
1041 * @again: set to false if the search has scanned the whole of RAM
1043 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
1045 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
1046 if (pss->complete_round && pss->block == rs->last_seen_block &&
1047 pss->page >= rs->last_page) {
1049 * We've been once around the RAM and haven't found anything.
1050 * Give up.
1052 *again = false;
1053 return false;
1055 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
1056 /* Didn't find anything in this RAM Block */
1057 pss->page = 0;
1058 pss->block = QLIST_NEXT_RCU(pss->block, next);
1059 if (!pss->block) {
1060 /* Hit the end of the list */
1061 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1062 /* Flag that we've looped */
1063 pss->complete_round = true;
1064 rs->ram_bulk_stage = false;
1065 if (migrate_use_xbzrle()) {
1066 /* If xbzrle is on, stop using the data compression at this
1067 * point. In theory, xbzrle can do better than compression.
1069 flush_compressed_data(rs);
1072 /* Didn't find anything this time, but try again on the new block */
1073 *again = true;
1074 return false;
1075 } else {
1076 /* Can go around again, but... */
1077 *again = true;
1078 /* We've found something so probably don't need to */
1079 return true;
1084 * unqueue_page: gets a page of the queue
1086 * Helper for 'get_queued_page' - gets a page off the queue
1088 * Returns the block of the page (or NULL if none available)
1090 * @rs: current RAM state
1091 * @offset: used to return the offset within the RAMBlock
1093 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1095 RAMBlock *block = NULL;
1097 qemu_mutex_lock(&rs->src_page_req_mutex);
1098 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1099 struct RAMSrcPageRequest *entry =
1100 QSIMPLEQ_FIRST(&rs->src_page_requests);
1101 block = entry->rb;
1102 *offset = entry->offset;
1104 if (entry->len > TARGET_PAGE_SIZE) {
1105 entry->len -= TARGET_PAGE_SIZE;
1106 entry->offset += TARGET_PAGE_SIZE;
1107 } else {
1108 memory_region_unref(block->mr);
1109 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1110 g_free(entry);
1113 qemu_mutex_unlock(&rs->src_page_req_mutex);
1115 return block;
1119 * get_queued_page: unqueue a page from the postocpy requests
1121 * Skips pages that are already sent (!dirty)
1123 * Returns if a queued page is found
1125 * @rs: current RAM state
1126 * @pss: data about the state of the current dirty page scan
1128 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1130 RAMBlock *block;
1131 ram_addr_t offset;
1132 bool dirty;
1134 do {
1135 block = unqueue_page(rs, &offset);
1137 * We're sending this page, and since it's postcopy nothing else
1138 * will dirty it, and we must make sure it doesn't get sent again
1139 * even if this queue request was received after the background
1140 * search already sent it.
1142 if (block) {
1143 unsigned long page;
1145 page = offset >> TARGET_PAGE_BITS;
1146 dirty = test_bit(page, block->bmap);
1147 if (!dirty) {
1148 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1149 page, test_bit(page, block->unsentmap));
1150 } else {
1151 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
1155 } while (block && !dirty);
1157 if (block) {
1159 * As soon as we start servicing pages out of order, then we have
1160 * to kill the bulk stage, since the bulk stage assumes
1161 * in (migration_bitmap_find_and_reset_dirty) that every page is
1162 * dirty, that's no longer true.
1164 rs->ram_bulk_stage = false;
1167 * We want the background search to continue from the queued page
1168 * since the guest is likely to want other pages near to the page
1169 * it just requested.
1171 pss->block = block;
1172 pss->page = offset >> TARGET_PAGE_BITS;
1175 return !!block;
1179 * migration_page_queue_free: drop any remaining pages in the ram
1180 * request queue
1182 * It should be empty at the end anyway, but in error cases there may
1183 * be some left. in case that there is any page left, we drop it.
1186 void migration_page_queue_free(void)
1188 struct RAMSrcPageRequest *mspr, *next_mspr;
1189 RAMState *rs = &ram_state;
1190 /* This queue generally should be empty - but in the case of a failed
1191 * migration might have some droppings in.
1193 rcu_read_lock();
1194 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
1195 memory_region_unref(mspr->rb->mr);
1196 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1197 g_free(mspr);
1199 rcu_read_unlock();
1203 * ram_save_queue_pages: queue the page for transmission
1205 * A request from postcopy destination for example.
1207 * Returns zero on success or negative on error
1209 * @rbname: Name of the RAMBLock of the request. NULL means the
1210 * same that last one.
1211 * @start: starting address from the start of the RAMBlock
1212 * @len: length (in bytes) to send
1214 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
1216 RAMBlock *ramblock;
1217 RAMState *rs = &ram_state;
1219 rs->postcopy_requests++;
1220 rcu_read_lock();
1221 if (!rbname) {
1222 /* Reuse last RAMBlock */
1223 ramblock = rs->last_req_rb;
1225 if (!ramblock) {
1227 * Shouldn't happen, we can't reuse the last RAMBlock if
1228 * it's the 1st request.
1230 error_report("ram_save_queue_pages no previous block");
1231 goto err;
1233 } else {
1234 ramblock = qemu_ram_block_by_name(rbname);
1236 if (!ramblock) {
1237 /* We shouldn't be asked for a non-existent RAMBlock */
1238 error_report("ram_save_queue_pages no block '%s'", rbname);
1239 goto err;
1241 rs->last_req_rb = ramblock;
1243 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1244 if (start+len > ramblock->used_length) {
1245 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1246 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1247 __func__, start, len, ramblock->used_length);
1248 goto err;
1251 struct RAMSrcPageRequest *new_entry =
1252 g_malloc0(sizeof(struct RAMSrcPageRequest));
1253 new_entry->rb = ramblock;
1254 new_entry->offset = start;
1255 new_entry->len = len;
1257 memory_region_ref(ramblock->mr);
1258 qemu_mutex_lock(&rs->src_page_req_mutex);
1259 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1260 qemu_mutex_unlock(&rs->src_page_req_mutex);
1261 rcu_read_unlock();
1263 return 0;
1265 err:
1266 rcu_read_unlock();
1267 return -1;
1271 * ram_save_target_page: save one target page
1273 * Returns the number of pages written
1275 * @rs: current RAM state
1276 * @ms: current migration state
1277 * @pss: data about the page we want to send
1278 * @last_stage: if we are at the completion stage
1280 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
1281 bool last_stage)
1283 int res = 0;
1285 /* Check the pages is dirty and if it is send it */
1286 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
1288 * If xbzrle is on, stop using the data compression after first
1289 * round of migration even if compression is enabled. In theory,
1290 * xbzrle can do better than compression.
1292 if (migrate_use_compression() &&
1293 (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
1294 res = ram_save_compressed_page(rs, pss, last_stage);
1295 } else {
1296 res = ram_save_page(rs, pss, last_stage);
1299 if (res < 0) {
1300 return res;
1302 if (pss->block->unsentmap) {
1303 clear_bit(pss->page, pss->block->unsentmap);
1307 return res;
1311 * ram_save_host_page: save a whole host page
1313 * Starting at *offset send pages up to the end of the current host
1314 * page. It's valid for the initial offset to point into the middle of
1315 * a host page in which case the remainder of the hostpage is sent.
1316 * Only dirty target pages are sent. Note that the host page size may
1317 * be a huge page for this block.
1318 * The saving stops at the boundary of the used_length of the block
1319 * if the RAMBlock isn't a multiple of the host page size.
1321 * Returns the number of pages written or negative on error
1323 * @rs: current RAM state
1324 * @ms: current migration state
1325 * @pss: data about the page we want to send
1326 * @last_stage: if we are at the completion stage
1328 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
1329 bool last_stage)
1331 int tmppages, pages = 0;
1332 size_t pagesize_bits =
1333 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
1335 do {
1336 tmppages = ram_save_target_page(rs, pss, last_stage);
1337 if (tmppages < 0) {
1338 return tmppages;
1341 pages += tmppages;
1342 pss->page++;
1343 } while ((pss->page & (pagesize_bits - 1)) &&
1344 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
1346 /* The offset we leave with is the last one we looked at */
1347 pss->page--;
1348 return pages;
1352 * ram_find_and_save_block: finds a dirty page and sends it to f
1354 * Called within an RCU critical section.
1356 * Returns the number of pages written where zero means no dirty pages
1358 * @rs: current RAM state
1359 * @last_stage: if we are at the completion stage
1361 * On systems where host-page-size > target-page-size it will send all the
1362 * pages in a host page that are dirty.
1365 static int ram_find_and_save_block(RAMState *rs, bool last_stage)
1367 PageSearchStatus pss;
1368 int pages = 0;
1369 bool again, found;
1371 /* No dirty page as there is zero RAM */
1372 if (!ram_bytes_total()) {
1373 return pages;
1376 pss.block = rs->last_seen_block;
1377 pss.page = rs->last_page;
1378 pss.complete_round = false;
1380 if (!pss.block) {
1381 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1384 do {
1385 again = true;
1386 found = get_queued_page(rs, &pss);
1388 if (!found) {
1389 /* priority queue empty, so just search for something dirty */
1390 found = find_dirty_block(rs, &pss, &again);
1393 if (found) {
1394 pages = ram_save_host_page(rs, &pss, last_stage);
1396 } while (!pages && again);
1398 rs->last_seen_block = pss.block;
1399 rs->last_page = pss.page;
1401 return pages;
1404 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1406 uint64_t pages = size / TARGET_PAGE_SIZE;
1407 RAMState *rs = &ram_state;
1409 if (zero) {
1410 rs->zero_pages += pages;
1411 } else {
1412 rs->norm_pages += pages;
1413 rs->bytes_transferred += size;
1414 qemu_update_position(f, size);
1418 uint64_t ram_bytes_total(void)
1420 RAMBlock *block;
1421 uint64_t total = 0;
1423 rcu_read_lock();
1424 RAMBLOCK_FOREACH(block) {
1425 total += block->used_length;
1427 rcu_read_unlock();
1428 return total;
1431 void free_xbzrle_decoded_buf(void)
1433 g_free(xbzrle_decoded_buf);
1434 xbzrle_decoded_buf = NULL;
1437 static void ram_migration_cleanup(void *opaque)
1439 RAMBlock *block;
1441 /* caller have hold iothread lock or is in a bh, so there is
1442 * no writing race against this migration_bitmap
1444 memory_global_dirty_log_stop();
1446 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1447 g_free(block->bmap);
1448 block->bmap = NULL;
1449 g_free(block->unsentmap);
1450 block->unsentmap = NULL;
1453 XBZRLE_cache_lock();
1454 if (XBZRLE.cache) {
1455 cache_fini(XBZRLE.cache);
1456 g_free(XBZRLE.encoded_buf);
1457 g_free(XBZRLE.current_buf);
1458 g_free(ZERO_TARGET_PAGE);
1459 XBZRLE.cache = NULL;
1460 XBZRLE.encoded_buf = NULL;
1461 XBZRLE.current_buf = NULL;
1463 XBZRLE_cache_unlock();
1466 static void ram_state_reset(RAMState *rs)
1468 rs->last_seen_block = NULL;
1469 rs->last_sent_block = NULL;
1470 rs->last_page = 0;
1471 rs->last_version = ram_list.version;
1472 rs->ram_bulk_stage = true;
1475 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1478 * 'expected' is the value you expect the bitmap mostly to be full
1479 * of; it won't bother printing lines that are all this value.
1480 * If 'todump' is null the migration bitmap is dumped.
1482 void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
1483 unsigned long pages)
1485 int64_t cur;
1486 int64_t linelen = 128;
1487 char linebuf[129];
1489 for (cur = 0; cur < pages; cur += linelen) {
1490 int64_t curb;
1491 bool found = false;
1493 * Last line; catch the case where the line length
1494 * is longer than remaining ram
1496 if (cur + linelen > pages) {
1497 linelen = pages - cur;
1499 for (curb = 0; curb < linelen; curb++) {
1500 bool thisbit = test_bit(cur + curb, todump);
1501 linebuf[curb] = thisbit ? '1' : '.';
1502 found = found || (thisbit != expected);
1504 if (found) {
1505 linebuf[curb] = '\0';
1506 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1511 /* **** functions for postcopy ***** */
1513 void ram_postcopy_migrated_memory_release(MigrationState *ms)
1515 struct RAMBlock *block;
1517 RAMBLOCK_FOREACH(block) {
1518 unsigned long *bitmap = block->bmap;
1519 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
1520 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
1522 while (run_start < range) {
1523 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1524 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
1525 (run_end - run_start) << TARGET_PAGE_BITS);
1526 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1532 * postcopy_send_discard_bm_ram: discard a RAMBlock
1534 * Returns zero on success
1536 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1537 * Note: At this point the 'unsentmap' is the processed bitmap combined
1538 * with the dirtymap; so a '1' means it's either dirty or unsent.
1540 * @ms: current migration state
1541 * @pds: state for postcopy
1542 * @start: RAMBlock starting page
1543 * @length: RAMBlock size
1545 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1546 PostcopyDiscardState *pds,
1547 RAMBlock *block)
1549 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
1550 unsigned long current;
1551 unsigned long *unsentmap = block->unsentmap;
1553 for (current = 0; current < end; ) {
1554 unsigned long one = find_next_bit(unsentmap, end, current);
1556 if (one <= end) {
1557 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1558 unsigned long discard_length;
1560 if (zero >= end) {
1561 discard_length = end - one;
1562 } else {
1563 discard_length = zero - one;
1565 if (discard_length) {
1566 postcopy_discard_send_range(ms, pds, one, discard_length);
1568 current = one + discard_length;
1569 } else {
1570 current = one;
1574 return 0;
1578 * postcopy_each_ram_send_discard: discard all RAMBlocks
1580 * Returns 0 for success or negative for error
1582 * Utility for the outgoing postcopy code.
1583 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1584 * passing it bitmap indexes and name.
1585 * (qemu_ram_foreach_block ends up passing unscaled lengths
1586 * which would mean postcopy code would have to deal with target page)
1588 * @ms: current migration state
1590 static int postcopy_each_ram_send_discard(MigrationState *ms)
1592 struct RAMBlock *block;
1593 int ret;
1595 RAMBLOCK_FOREACH(block) {
1596 PostcopyDiscardState *pds =
1597 postcopy_discard_send_init(ms, block->idstr);
1600 * Postcopy sends chunks of bitmap over the wire, but it
1601 * just needs indexes at this point, avoids it having
1602 * target page specific code.
1604 ret = postcopy_send_discard_bm_ram(ms, pds, block);
1605 postcopy_discard_send_finish(ms, pds);
1606 if (ret) {
1607 return ret;
1611 return 0;
1615 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1617 * Helper for postcopy_chunk_hostpages; it's called twice to
1618 * canonicalize the two bitmaps, that are similar, but one is
1619 * inverted.
1621 * Postcopy requires that all target pages in a hostpage are dirty or
1622 * clean, not a mix. This function canonicalizes the bitmaps.
1624 * @ms: current migration state
1625 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1626 * otherwise we need to canonicalize partially dirty host pages
1627 * @block: block that contains the page we want to canonicalize
1628 * @pds: state for postcopy
1630 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1631 RAMBlock *block,
1632 PostcopyDiscardState *pds)
1634 RAMState *rs = &ram_state;
1635 unsigned long *bitmap = block->bmap;
1636 unsigned long *unsentmap = block->unsentmap;
1637 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
1638 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
1639 unsigned long run_start;
1641 if (block->page_size == TARGET_PAGE_SIZE) {
1642 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1643 return;
1646 if (unsent_pass) {
1647 /* Find a sent page */
1648 run_start = find_next_zero_bit(unsentmap, pages, 0);
1649 } else {
1650 /* Find a dirty page */
1651 run_start = find_next_bit(bitmap, pages, 0);
1654 while (run_start < pages) {
1655 bool do_fixup = false;
1656 unsigned long fixup_start_addr;
1657 unsigned long host_offset;
1660 * If the start of this run of pages is in the middle of a host
1661 * page, then we need to fixup this host page.
1663 host_offset = run_start % host_ratio;
1664 if (host_offset) {
1665 do_fixup = true;
1666 run_start -= host_offset;
1667 fixup_start_addr = run_start;
1668 /* For the next pass */
1669 run_start = run_start + host_ratio;
1670 } else {
1671 /* Find the end of this run */
1672 unsigned long run_end;
1673 if (unsent_pass) {
1674 run_end = find_next_bit(unsentmap, pages, run_start + 1);
1675 } else {
1676 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
1679 * If the end isn't at the start of a host page, then the
1680 * run doesn't finish at the end of a host page
1681 * and we need to discard.
1683 host_offset = run_end % host_ratio;
1684 if (host_offset) {
1685 do_fixup = true;
1686 fixup_start_addr = run_end - host_offset;
1688 * This host page has gone, the next loop iteration starts
1689 * from after the fixup
1691 run_start = fixup_start_addr + host_ratio;
1692 } else {
1694 * No discards on this iteration, next loop starts from
1695 * next sent/dirty page
1697 run_start = run_end + 1;
1701 if (do_fixup) {
1702 unsigned long page;
1704 /* Tell the destination to discard this page */
1705 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1706 /* For the unsent_pass we:
1707 * discard partially sent pages
1708 * For the !unsent_pass (dirty) we:
1709 * discard partially dirty pages that were sent
1710 * (any partially sent pages were already discarded
1711 * by the previous unsent_pass)
1713 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1714 host_ratio);
1717 /* Clean up the bitmap */
1718 for (page = fixup_start_addr;
1719 page < fixup_start_addr + host_ratio; page++) {
1720 /* All pages in this host page are now not sent */
1721 set_bit(page, unsentmap);
1724 * Remark them as dirty, updating the count for any pages
1725 * that weren't previously dirty.
1727 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
1731 if (unsent_pass) {
1732 /* Find the next sent page for the next iteration */
1733 run_start = find_next_zero_bit(unsentmap, pages, run_start);
1734 } else {
1735 /* Find the next dirty page for the next iteration */
1736 run_start = find_next_bit(bitmap, pages, run_start);
1742 * postcopy_chuck_hostpages: discrad any partially sent host page
1744 * Utility for the outgoing postcopy code.
1746 * Discard any partially sent host-page size chunks, mark any partially
1747 * dirty host-page size chunks as all dirty. In this case the host-page
1748 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1750 * Returns zero on success
1752 * @ms: current migration state
1753 * @block: block we want to work with
1755 static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
1757 PostcopyDiscardState *pds =
1758 postcopy_discard_send_init(ms, block->idstr);
1760 /* First pass: Discard all partially sent host pages */
1761 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1763 * Second pass: Ensure that all partially dirty host pages are made
1764 * fully dirty.
1766 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1768 postcopy_discard_send_finish(ms, pds);
1769 return 0;
1773 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1775 * Returns zero on success
1777 * Transmit the set of pages to be discarded after precopy to the target
1778 * these are pages that:
1779 * a) Have been previously transmitted but are now dirty again
1780 * b) Pages that have never been transmitted, this ensures that
1781 * any pages on the destination that have been mapped by background
1782 * tasks get discarded (transparent huge pages is the specific concern)
1783 * Hopefully this is pretty sparse
1785 * @ms: current migration state
1787 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1789 RAMState *rs = &ram_state;
1790 RAMBlock *block;
1791 int ret;
1793 rcu_read_lock();
1795 /* This should be our last sync, the src is now paused */
1796 migration_bitmap_sync(rs);
1798 /* Easiest way to make sure we don't resume in the middle of a host-page */
1799 rs->last_seen_block = NULL;
1800 rs->last_sent_block = NULL;
1801 rs->last_page = 0;
1803 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1804 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
1805 unsigned long *bitmap = block->bmap;
1806 unsigned long *unsentmap = block->unsentmap;
1808 if (!unsentmap) {
1809 /* We don't have a safe way to resize the sentmap, so
1810 * if the bitmap was resized it will be NULL at this
1811 * point.
1813 error_report("migration ram resized during precopy phase");
1814 rcu_read_unlock();
1815 return -EINVAL;
1817 /* Deal with TPS != HPS and huge pages */
1818 ret = postcopy_chunk_hostpages(ms, block);
1819 if (ret) {
1820 rcu_read_unlock();
1821 return ret;
1825 * Update the unsentmap to be unsentmap = unsentmap | dirty
1827 bitmap_or(unsentmap, unsentmap, bitmap, pages);
1828 #ifdef DEBUG_POSTCOPY
1829 ram_debug_dump_bitmap(unsentmap, true, pages);
1830 #endif
1832 trace_ram_postcopy_send_discard_bitmap();
1834 ret = postcopy_each_ram_send_discard(ms);
1835 rcu_read_unlock();
1837 return ret;
1841 * ram_discard_range: discard dirtied pages at the beginning of postcopy
1843 * Returns zero on success
1845 * @rbname: name of the RAMBlock of the request. NULL means the
1846 * same that last one.
1847 * @start: RAMBlock starting page
1848 * @length: RAMBlock size
1850 int ram_discard_range(const char *rbname, uint64_t start, size_t length)
1852 int ret = -1;
1854 trace_ram_discard_range(rbname, start, length);
1856 rcu_read_lock();
1857 RAMBlock *rb = qemu_ram_block_by_name(rbname);
1859 if (!rb) {
1860 error_report("ram_discard_range: Failed to find block '%s'", rbname);
1861 goto err;
1864 ret = ram_block_discard_range(rb, start, length);
1866 err:
1867 rcu_read_unlock();
1869 return ret;
1872 static int ram_state_init(RAMState *rs)
1874 memset(rs, 0, sizeof(*rs));
1875 qemu_mutex_init(&rs->bitmap_mutex);
1876 qemu_mutex_init(&rs->src_page_req_mutex);
1877 QSIMPLEQ_INIT(&rs->src_page_requests);
1879 if (migrate_use_xbzrle()) {
1880 XBZRLE_cache_lock();
1881 ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
1882 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1883 TARGET_PAGE_SIZE,
1884 TARGET_PAGE_SIZE);
1885 if (!XBZRLE.cache) {
1886 XBZRLE_cache_unlock();
1887 error_report("Error creating cache");
1888 return -1;
1890 XBZRLE_cache_unlock();
1892 /* We prefer not to abort if there is no memory */
1893 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1894 if (!XBZRLE.encoded_buf) {
1895 error_report("Error allocating encoded_buf");
1896 return -1;
1899 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1900 if (!XBZRLE.current_buf) {
1901 error_report("Error allocating current_buf");
1902 g_free(XBZRLE.encoded_buf);
1903 XBZRLE.encoded_buf = NULL;
1904 return -1;
1908 /* For memory_global_dirty_log_start below. */
1909 qemu_mutex_lock_iothread();
1911 qemu_mutex_lock_ramlist();
1912 rcu_read_lock();
1913 ram_state_reset(rs);
1915 /* Skip setting bitmap if there is no RAM */
1916 if (ram_bytes_total()) {
1917 RAMBlock *block;
1919 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1920 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
1922 block->bmap = bitmap_new(pages);
1923 bitmap_set(block->bmap, 0, pages);
1924 if (migrate_postcopy_ram()) {
1925 block->unsentmap = bitmap_new(pages);
1926 bitmap_set(block->unsentmap, 0, pages);
1932 * Count the total number of pages used by ram blocks not including any
1933 * gaps due to alignment or unplugs.
1935 rs->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
1937 memory_global_dirty_log_start();
1938 migration_bitmap_sync(rs);
1939 qemu_mutex_unlock_ramlist();
1940 qemu_mutex_unlock_iothread();
1941 rcu_read_unlock();
1943 return 0;
1947 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1948 * long-running RCU critical section. When rcu-reclaims in the code
1949 * start to become numerous it will be necessary to reduce the
1950 * granularity of these critical sections.
1954 * ram_save_setup: Setup RAM for migration
1956 * Returns zero to indicate success and negative for error
1958 * @f: QEMUFile where to send the data
1959 * @opaque: RAMState pointer
1961 static int ram_save_setup(QEMUFile *f, void *opaque)
1963 RAMState *rs = opaque;
1964 RAMBlock *block;
1966 /* migration has already setup the bitmap, reuse it. */
1967 if (!migration_in_colo_state()) {
1968 if (ram_state_init(rs) < 0) {
1969 return -1;
1972 rs->f = f;
1974 rcu_read_lock();
1976 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
1978 RAMBLOCK_FOREACH(block) {
1979 qemu_put_byte(f, strlen(block->idstr));
1980 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
1981 qemu_put_be64(f, block->used_length);
1982 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
1983 qemu_put_be64(f, block->page_size);
1987 rcu_read_unlock();
1989 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
1990 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
1992 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
1994 return 0;
1998 * ram_save_iterate: iterative stage for migration
2000 * Returns zero to indicate success and negative for error
2002 * @f: QEMUFile where to send the data
2003 * @opaque: RAMState pointer
2005 static int ram_save_iterate(QEMUFile *f, void *opaque)
2007 RAMState *rs = opaque;
2008 int ret;
2009 int i;
2010 int64_t t0;
2011 int done = 0;
2013 rcu_read_lock();
2014 if (ram_list.version != rs->last_version) {
2015 ram_state_reset(rs);
2018 /* Read version before ram_list.blocks */
2019 smp_rmb();
2021 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2023 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2024 i = 0;
2025 while ((ret = qemu_file_rate_limit(f)) == 0) {
2026 int pages;
2028 pages = ram_find_and_save_block(rs, false);
2029 /* no more pages to sent */
2030 if (pages == 0) {
2031 done = 1;
2032 break;
2034 rs->iterations++;
2036 /* we want to check in the 1st loop, just in case it was the 1st time
2037 and we had to sync the dirty bitmap.
2038 qemu_get_clock_ns() is a bit expensive, so we only check each some
2039 iterations
2041 if ((i & 63) == 0) {
2042 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2043 if (t1 > MAX_WAIT) {
2044 trace_ram_save_iterate_big_wait(t1, i);
2045 break;
2048 i++;
2050 flush_compressed_data(rs);
2051 rcu_read_unlock();
2054 * Must occur before EOS (or any QEMUFile operation)
2055 * because of RDMA protocol.
2057 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2059 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2060 rs->bytes_transferred += 8;
2062 ret = qemu_file_get_error(f);
2063 if (ret < 0) {
2064 return ret;
2067 return done;
2071 * ram_save_complete: function called to send the remaining amount of ram
2073 * Returns zero to indicate success
2075 * Called with iothread lock
2077 * @f: QEMUFile where to send the data
2078 * @opaque: RAMState pointer
2080 static int ram_save_complete(QEMUFile *f, void *opaque)
2082 RAMState *rs = opaque;
2084 rcu_read_lock();
2086 if (!migration_in_postcopy()) {
2087 migration_bitmap_sync(rs);
2090 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2092 /* try transferring iterative blocks of memory */
2094 /* flush all remaining blocks regardless of rate limiting */
2095 while (true) {
2096 int pages;
2098 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
2099 /* no more blocks to sent */
2100 if (pages == 0) {
2101 break;
2105 flush_compressed_data(rs);
2106 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2108 rcu_read_unlock();
2110 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2112 return 0;
2115 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2116 uint64_t *non_postcopiable_pending,
2117 uint64_t *postcopiable_pending)
2119 RAMState *rs = opaque;
2120 uint64_t remaining_size;
2122 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2124 if (!migration_in_postcopy() &&
2125 remaining_size < max_size) {
2126 qemu_mutex_lock_iothread();
2127 rcu_read_lock();
2128 migration_bitmap_sync(rs);
2129 rcu_read_unlock();
2130 qemu_mutex_unlock_iothread();
2131 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2134 /* We can do postcopy, and all the data is postcopiable */
2135 *postcopiable_pending += remaining_size;
2138 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2140 unsigned int xh_len;
2141 int xh_flags;
2142 uint8_t *loaded_data;
2144 if (!xbzrle_decoded_buf) {
2145 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2147 loaded_data = xbzrle_decoded_buf;
2149 /* extract RLE header */
2150 xh_flags = qemu_get_byte(f);
2151 xh_len = qemu_get_be16(f);
2153 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2154 error_report("Failed to load XBZRLE page - wrong compression!");
2155 return -1;
2158 if (xh_len > TARGET_PAGE_SIZE) {
2159 error_report("Failed to load XBZRLE page - len overflow!");
2160 return -1;
2162 /* load data and decode */
2163 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2165 /* decode RLE */
2166 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2167 TARGET_PAGE_SIZE) == -1) {
2168 error_report("Failed to load XBZRLE page - decode error!");
2169 return -1;
2172 return 0;
2176 * ram_block_from_stream: read a RAMBlock id from the migration stream
2178 * Must be called from within a rcu critical section.
2180 * Returns a pointer from within the RCU-protected ram_list.
2182 * @f: QEMUFile where to read the data from
2183 * @flags: Page flags (mostly to see if it's a continuation of previous block)
2185 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
2187 static RAMBlock *block = NULL;
2188 char id[256];
2189 uint8_t len;
2191 if (flags & RAM_SAVE_FLAG_CONTINUE) {
2192 if (!block) {
2193 error_report("Ack, bad migration stream!");
2194 return NULL;
2196 return block;
2199 len = qemu_get_byte(f);
2200 qemu_get_buffer(f, (uint8_t *)id, len);
2201 id[len] = 0;
2203 block = qemu_ram_block_by_name(id);
2204 if (!block) {
2205 error_report("Can't find block %s", id);
2206 return NULL;
2209 return block;
2212 static inline void *host_from_ram_block_offset(RAMBlock *block,
2213 ram_addr_t offset)
2215 if (!offset_in_ramblock(block, offset)) {
2216 return NULL;
2219 return block->host + offset;
2223 * ram_handle_compressed: handle the zero page case
2225 * If a page (or a whole RDMA chunk) has been
2226 * determined to be zero, then zap it.
2228 * @host: host address for the zero page
2229 * @ch: what the page is filled from. We only support zero
2230 * @size: size of the zero page
2232 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2234 if (ch != 0 || !is_zero_range(host, size)) {
2235 memset(host, ch, size);
2239 static void *do_data_decompress(void *opaque)
2241 DecompressParam *param = opaque;
2242 unsigned long pagesize;
2243 uint8_t *des;
2244 int len;
2246 qemu_mutex_lock(&param->mutex);
2247 while (!param->quit) {
2248 if (param->des) {
2249 des = param->des;
2250 len = param->len;
2251 param->des = 0;
2252 qemu_mutex_unlock(&param->mutex);
2254 pagesize = TARGET_PAGE_SIZE;
2255 /* uncompress() will return failed in some case, especially
2256 * when the page is dirted when doing the compression, it's
2257 * not a problem because the dirty page will be retransferred
2258 * and uncompress() won't break the data in other pages.
2260 uncompress((Bytef *)des, &pagesize,
2261 (const Bytef *)param->compbuf, len);
2263 qemu_mutex_lock(&decomp_done_lock);
2264 param->done = true;
2265 qemu_cond_signal(&decomp_done_cond);
2266 qemu_mutex_unlock(&decomp_done_lock);
2268 qemu_mutex_lock(&param->mutex);
2269 } else {
2270 qemu_cond_wait(&param->cond, &param->mutex);
2273 qemu_mutex_unlock(&param->mutex);
2275 return NULL;
2278 static void wait_for_decompress_done(void)
2280 int idx, thread_count;
2282 if (!migrate_use_compression()) {
2283 return;
2286 thread_count = migrate_decompress_threads();
2287 qemu_mutex_lock(&decomp_done_lock);
2288 for (idx = 0; idx < thread_count; idx++) {
2289 while (!decomp_param[idx].done) {
2290 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2293 qemu_mutex_unlock(&decomp_done_lock);
2296 void migrate_decompress_threads_create(void)
2298 int i, thread_count;
2300 thread_count = migrate_decompress_threads();
2301 decompress_threads = g_new0(QemuThread, thread_count);
2302 decomp_param = g_new0(DecompressParam, thread_count);
2303 qemu_mutex_init(&decomp_done_lock);
2304 qemu_cond_init(&decomp_done_cond);
2305 for (i = 0; i < thread_count; i++) {
2306 qemu_mutex_init(&decomp_param[i].mutex);
2307 qemu_cond_init(&decomp_param[i].cond);
2308 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2309 decomp_param[i].done = true;
2310 decomp_param[i].quit = false;
2311 qemu_thread_create(decompress_threads + i, "decompress",
2312 do_data_decompress, decomp_param + i,
2313 QEMU_THREAD_JOINABLE);
2317 void migrate_decompress_threads_join(void)
2319 int i, thread_count;
2321 thread_count = migrate_decompress_threads();
2322 for (i = 0; i < thread_count; i++) {
2323 qemu_mutex_lock(&decomp_param[i].mutex);
2324 decomp_param[i].quit = true;
2325 qemu_cond_signal(&decomp_param[i].cond);
2326 qemu_mutex_unlock(&decomp_param[i].mutex);
2328 for (i = 0; i < thread_count; i++) {
2329 qemu_thread_join(decompress_threads + i);
2330 qemu_mutex_destroy(&decomp_param[i].mutex);
2331 qemu_cond_destroy(&decomp_param[i].cond);
2332 g_free(decomp_param[i].compbuf);
2334 g_free(decompress_threads);
2335 g_free(decomp_param);
2336 decompress_threads = NULL;
2337 decomp_param = NULL;
2340 static void decompress_data_with_multi_threads(QEMUFile *f,
2341 void *host, int len)
2343 int idx, thread_count;
2345 thread_count = migrate_decompress_threads();
2346 qemu_mutex_lock(&decomp_done_lock);
2347 while (true) {
2348 for (idx = 0; idx < thread_count; idx++) {
2349 if (decomp_param[idx].done) {
2350 decomp_param[idx].done = false;
2351 qemu_mutex_lock(&decomp_param[idx].mutex);
2352 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2353 decomp_param[idx].des = host;
2354 decomp_param[idx].len = len;
2355 qemu_cond_signal(&decomp_param[idx].cond);
2356 qemu_mutex_unlock(&decomp_param[idx].mutex);
2357 break;
2360 if (idx < thread_count) {
2361 break;
2362 } else {
2363 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2366 qemu_mutex_unlock(&decomp_done_lock);
2370 * ram_postcopy_incoming_init: allocate postcopy data structures
2372 * Returns 0 for success and negative if there was one error
2374 * @mis: current migration incoming state
2376 * Allocate data structures etc needed by incoming migration with
2377 * postcopy-ram. postcopy-ram's similarly names
2378 * postcopy_ram_incoming_init does the work.
2380 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2382 unsigned long ram_pages = last_ram_page();
2384 return postcopy_ram_incoming_init(mis, ram_pages);
2388 * ram_load_postcopy: load a page in postcopy case
2390 * Returns 0 for success or -errno in case of error
2392 * Called in postcopy mode by ram_load().
2393 * rcu_read_lock is taken prior to this being called.
2395 * @f: QEMUFile where to send the data
2397 static int ram_load_postcopy(QEMUFile *f)
2399 int flags = 0, ret = 0;
2400 bool place_needed = false;
2401 bool matching_page_sizes = false;
2402 MigrationIncomingState *mis = migration_incoming_get_current();
2403 /* Temporary page that is later 'placed' */
2404 void *postcopy_host_page = postcopy_get_tmp_page(mis);
2405 void *last_host = NULL;
2406 bool all_zero = false;
2408 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2409 ram_addr_t addr;
2410 void *host = NULL;
2411 void *page_buffer = NULL;
2412 void *place_source = NULL;
2413 RAMBlock *block = NULL;
2414 uint8_t ch;
2416 addr = qemu_get_be64(f);
2417 flags = addr & ~TARGET_PAGE_MASK;
2418 addr &= TARGET_PAGE_MASK;
2420 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2421 place_needed = false;
2422 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
2423 block = ram_block_from_stream(f, flags);
2425 host = host_from_ram_block_offset(block, addr);
2426 if (!host) {
2427 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2428 ret = -EINVAL;
2429 break;
2431 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
2433 * Postcopy requires that we place whole host pages atomically;
2434 * these may be huge pages for RAMBlocks that are backed by
2435 * hugetlbfs.
2436 * To make it atomic, the data is read into a temporary page
2437 * that's moved into place later.
2438 * The migration protocol uses, possibly smaller, target-pages
2439 * however the source ensures it always sends all the components
2440 * of a host page in order.
2442 page_buffer = postcopy_host_page +
2443 ((uintptr_t)host & (block->page_size - 1));
2444 /* If all TP are zero then we can optimise the place */
2445 if (!((uintptr_t)host & (block->page_size - 1))) {
2446 all_zero = true;
2447 } else {
2448 /* not the 1st TP within the HP */
2449 if (host != (last_host + TARGET_PAGE_SIZE)) {
2450 error_report("Non-sequential target page %p/%p",
2451 host, last_host);
2452 ret = -EINVAL;
2453 break;
2459 * If it's the last part of a host page then we place the host
2460 * page
2462 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2463 (block->page_size - 1)) == 0;
2464 place_source = postcopy_host_page;
2466 last_host = host;
2468 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2469 case RAM_SAVE_FLAG_ZERO:
2470 ch = qemu_get_byte(f);
2471 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2472 if (ch) {
2473 all_zero = false;
2475 break;
2477 case RAM_SAVE_FLAG_PAGE:
2478 all_zero = false;
2479 if (!place_needed || !matching_page_sizes) {
2480 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2481 } else {
2482 /* Avoids the qemu_file copy during postcopy, which is
2483 * going to do a copy later; can only do it when we
2484 * do this read in one go (matching page sizes)
2486 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2487 TARGET_PAGE_SIZE);
2489 break;
2490 case RAM_SAVE_FLAG_EOS:
2491 /* normal exit */
2492 break;
2493 default:
2494 error_report("Unknown combination of migration flags: %#x"
2495 " (postcopy mode)", flags);
2496 ret = -EINVAL;
2499 if (place_needed) {
2500 /* This gets called at the last target page in the host page */
2501 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2503 if (all_zero) {
2504 ret = postcopy_place_page_zero(mis, place_dest,
2505 block->page_size);
2506 } else {
2507 ret = postcopy_place_page(mis, place_dest,
2508 place_source, block->page_size);
2511 if (!ret) {
2512 ret = qemu_file_get_error(f);
2516 return ret;
2519 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2521 int flags = 0, ret = 0;
2522 static uint64_t seq_iter;
2523 int len = 0;
2525 * If system is running in postcopy mode, page inserts to host memory must
2526 * be atomic
2528 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2529 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2530 bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
2532 seq_iter++;
2534 if (version_id != 4) {
2535 ret = -EINVAL;
2538 /* This RCU critical section can be very long running.
2539 * When RCU reclaims in the code start to become numerous,
2540 * it will be necessary to reduce the granularity of this
2541 * critical section.
2543 rcu_read_lock();
2545 if (postcopy_running) {
2546 ret = ram_load_postcopy(f);
2549 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2550 ram_addr_t addr, total_ram_bytes;
2551 void *host = NULL;
2552 uint8_t ch;
2554 addr = qemu_get_be64(f);
2555 flags = addr & ~TARGET_PAGE_MASK;
2556 addr &= TARGET_PAGE_MASK;
2558 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
2559 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2560 RAMBlock *block = ram_block_from_stream(f, flags);
2562 host = host_from_ram_block_offset(block, addr);
2563 if (!host) {
2564 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2565 ret = -EINVAL;
2566 break;
2568 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
2571 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2572 case RAM_SAVE_FLAG_MEM_SIZE:
2573 /* Synchronize RAM block list */
2574 total_ram_bytes = addr;
2575 while (!ret && total_ram_bytes) {
2576 RAMBlock *block;
2577 char id[256];
2578 ram_addr_t length;
2580 len = qemu_get_byte(f);
2581 qemu_get_buffer(f, (uint8_t *)id, len);
2582 id[len] = 0;
2583 length = qemu_get_be64(f);
2585 block = qemu_ram_block_by_name(id);
2586 if (block) {
2587 if (length != block->used_length) {
2588 Error *local_err = NULL;
2590 ret = qemu_ram_resize(block, length,
2591 &local_err);
2592 if (local_err) {
2593 error_report_err(local_err);
2596 /* For postcopy we need to check hugepage sizes match */
2597 if (postcopy_advised &&
2598 block->page_size != qemu_host_page_size) {
2599 uint64_t remote_page_size = qemu_get_be64(f);
2600 if (remote_page_size != block->page_size) {
2601 error_report("Mismatched RAM page size %s "
2602 "(local) %zd != %" PRId64,
2603 id, block->page_size,
2604 remote_page_size);
2605 ret = -EINVAL;
2608 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2609 block->idstr);
2610 } else {
2611 error_report("Unknown ramblock \"%s\", cannot "
2612 "accept migration", id);
2613 ret = -EINVAL;
2616 total_ram_bytes -= length;
2618 break;
2620 case RAM_SAVE_FLAG_ZERO:
2621 ch = qemu_get_byte(f);
2622 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2623 break;
2625 case RAM_SAVE_FLAG_PAGE:
2626 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2627 break;
2629 case RAM_SAVE_FLAG_COMPRESS_PAGE:
2630 len = qemu_get_be32(f);
2631 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2632 error_report("Invalid compressed data length: %d", len);
2633 ret = -EINVAL;
2634 break;
2636 decompress_data_with_multi_threads(f, host, len);
2637 break;
2639 case RAM_SAVE_FLAG_XBZRLE:
2640 if (load_xbzrle(f, addr, host) < 0) {
2641 error_report("Failed to decompress XBZRLE page at "
2642 RAM_ADDR_FMT, addr);
2643 ret = -EINVAL;
2644 break;
2646 break;
2647 case RAM_SAVE_FLAG_EOS:
2648 /* normal exit */
2649 break;
2650 default:
2651 if (flags & RAM_SAVE_FLAG_HOOK) {
2652 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2653 } else {
2654 error_report("Unknown combination of migration flags: %#x",
2655 flags);
2656 ret = -EINVAL;
2659 if (!ret) {
2660 ret = qemu_file_get_error(f);
2664 wait_for_decompress_done();
2665 rcu_read_unlock();
2666 trace_ram_load_complete(ret, seq_iter);
2667 return ret;
2670 static SaveVMHandlers savevm_ram_handlers = {
2671 .save_live_setup = ram_save_setup,
2672 .save_live_iterate = ram_save_iterate,
2673 .save_live_complete_postcopy = ram_save_complete,
2674 .save_live_complete_precopy = ram_save_complete,
2675 .save_live_pending = ram_save_pending,
2676 .load_state = ram_load,
2677 .cleanup = ram_migration_cleanup,
2680 void ram_mig_init(void)
2682 qemu_mutex_init(&XBZRLE.lock);
2683 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);