4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 #include "qemu/osdep.h"
32 #include "qemu/cutils.h"
33 #include "qemu/bitops.h"
34 #include "qemu/bitmap.h"
35 #include "qemu/main-loop.h"
38 #include "migration.h"
39 #include "migration/register.h"
40 #include "migration/misc.h"
41 #include "qemu-file.h"
42 #include "postcopy-ram.h"
43 #include "migration/page_cache.h"
44 #include "qemu/error-report.h"
45 #include "qapi/error.h"
46 #include "qapi/qapi-events-migration.h"
47 #include "qapi/qmp/qerror.h"
49 #include "exec/ram_addr.h"
50 #include "exec/target_page.h"
51 #include "qemu/rcu_queue.h"
52 #include "migration/colo.h"
53 #include "migration/block.h"
55 /***********************************************************/
56 /* ram save/restore */
58 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
59 * worked for pages that where filled with the same char. We switched
60 * it to only search for the zero value. And to avoid confusion with
61 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
64 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
65 #define RAM_SAVE_FLAG_ZERO 0x02
66 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
67 #define RAM_SAVE_FLAG_PAGE 0x08
68 #define RAM_SAVE_FLAG_EOS 0x10
69 #define RAM_SAVE_FLAG_CONTINUE 0x20
70 #define RAM_SAVE_FLAG_XBZRLE 0x40
71 /* 0x80 is reserved in migration.h start with 0x100 next */
72 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
74 static inline bool is_zero_range(uint8_t *p
, uint64_t size
)
76 return buffer_is_zero(p
, size
);
79 XBZRLECacheStats xbzrle_counters
;
81 /* struct contains XBZRLE cache and a static page
82 used by the compression */
84 /* buffer used for XBZRLE encoding */
86 /* buffer for storing page content */
88 /* Cache for XBZRLE, Protected by lock. */
91 /* it will store a page full of zeros */
92 uint8_t *zero_target_page
;
93 /* buffer used for XBZRLE decoding */
97 static void XBZRLE_cache_lock(void)
99 if (migrate_use_xbzrle())
100 qemu_mutex_lock(&XBZRLE
.lock
);
103 static void XBZRLE_cache_unlock(void)
105 if (migrate_use_xbzrle())
106 qemu_mutex_unlock(&XBZRLE
.lock
);
110 * xbzrle_cache_resize: resize the xbzrle cache
112 * This function is called from qmp_migrate_set_cache_size in main
113 * thread, possibly while a migration is in progress. A running
114 * migration may be using the cache and might finish during this call,
115 * hence changes to the cache are protected by XBZRLE.lock().
117 * Returns 0 for success or -1 for error
119 * @new_size: new cache size
120 * @errp: set *errp if the check failed, with reason
122 int xbzrle_cache_resize(int64_t new_size
, Error
**errp
)
124 PageCache
*new_cache
;
127 /* Check for truncation */
128 if (new_size
!= (size_t)new_size
) {
129 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cache size",
130 "exceeding address space");
134 if (new_size
== migrate_xbzrle_cache_size()) {
141 if (XBZRLE
.cache
!= NULL
) {
142 new_cache
= cache_init(new_size
, TARGET_PAGE_SIZE
, errp
);
148 cache_fini(XBZRLE
.cache
);
149 XBZRLE
.cache
= new_cache
;
152 XBZRLE_cache_unlock();
156 static void ramblock_recv_map_init(void)
160 RAMBLOCK_FOREACH(rb
) {
161 assert(!rb
->receivedmap
);
162 rb
->receivedmap
= bitmap_new(rb
->max_length
>> qemu_target_page_bits());
166 int ramblock_recv_bitmap_test(RAMBlock
*rb
, void *host_addr
)
168 return test_bit(ramblock_recv_bitmap_offset(host_addr
, rb
),
172 bool ramblock_recv_bitmap_test_byte_offset(RAMBlock
*rb
, uint64_t byte_offset
)
174 return test_bit(byte_offset
>> TARGET_PAGE_BITS
, rb
->receivedmap
);
177 void ramblock_recv_bitmap_set(RAMBlock
*rb
, void *host_addr
)
179 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr
, rb
), rb
->receivedmap
);
182 void ramblock_recv_bitmap_set_range(RAMBlock
*rb
, void *host_addr
,
185 bitmap_set_atomic(rb
->receivedmap
,
186 ramblock_recv_bitmap_offset(host_addr
, rb
),
191 * An outstanding page request, on the source, having been received
194 struct RAMSrcPageRequest
{
199 QSIMPLEQ_ENTRY(RAMSrcPageRequest
) next_req
;
202 /* State of RAM for migration */
204 /* QEMUFile used for this migration */
206 /* Last block that we have visited searching for dirty pages */
207 RAMBlock
*last_seen_block
;
208 /* Last block from where we have sent data */
209 RAMBlock
*last_sent_block
;
210 /* Last dirty target page we have sent */
211 ram_addr_t last_page
;
212 /* last ram version we have seen */
213 uint32_t last_version
;
214 /* We are in the first round */
216 /* How many times we have dirty too many pages */
217 int dirty_rate_high_cnt
;
218 /* these variables are used for bitmap sync */
219 /* last time we did a full bitmap_sync */
220 int64_t time_last_bitmap_sync
;
221 /* bytes transferred at start_time */
222 uint64_t bytes_xfer_prev
;
223 /* number of dirty pages since start_time */
224 uint64_t num_dirty_pages_period
;
225 /* xbzrle misses since the beginning of the period */
226 uint64_t xbzrle_cache_miss_prev
;
227 /* number of iterations at the beginning of period */
228 uint64_t iterations_prev
;
229 /* Iterations since start */
231 /* number of dirty bits in the bitmap */
232 uint64_t migration_dirty_pages
;
233 /* protects modification of the bitmap */
234 QemuMutex bitmap_mutex
;
235 /* The RAMBlock used in the last src_page_requests */
236 RAMBlock
*last_req_rb
;
237 /* Queue of outstanding page requests from the destination */
238 QemuMutex src_page_req_mutex
;
239 QSIMPLEQ_HEAD(src_page_requests
, RAMSrcPageRequest
) src_page_requests
;
241 typedef struct RAMState RAMState
;
243 static RAMState
*ram_state
;
245 uint64_t ram_bytes_remaining(void)
247 return ram_state
? (ram_state
->migration_dirty_pages
* TARGET_PAGE_SIZE
) :
251 MigrationStats ram_counters
;
253 /* used by the search for pages to send */
254 struct PageSearchStatus
{
255 /* Current block being searched */
257 /* Current page to search from */
259 /* Set once we wrap around */
262 typedef struct PageSearchStatus PageSearchStatus
;
264 struct CompressParam
{
273 /* internally used fields */
277 typedef struct CompressParam CompressParam
;
279 struct DecompressParam
{
289 typedef struct DecompressParam DecompressParam
;
291 static CompressParam
*comp_param
;
292 static QemuThread
*compress_threads
;
293 /* comp_done_cond is used to wake up the migration thread when
294 * one of the compression threads has finished the compression.
295 * comp_done_lock is used to co-work with comp_done_cond.
297 static QemuMutex comp_done_lock
;
298 static QemuCond comp_done_cond
;
299 /* The empty QEMUFileOps will be used by file in CompressParam */
300 static const QEMUFileOps empty_ops
= { };
302 static QEMUFile
*decomp_file
;
303 static DecompressParam
*decomp_param
;
304 static QemuThread
*decompress_threads
;
305 static QemuMutex decomp_done_lock
;
306 static QemuCond decomp_done_cond
;
308 static int do_compress_ram_page(QEMUFile
*f
, z_stream
*stream
, RAMBlock
*block
,
309 ram_addr_t offset
, uint8_t *source_buf
);
311 static void *do_data_compress(void *opaque
)
313 CompressParam
*param
= opaque
;
317 qemu_mutex_lock(¶m
->mutex
);
318 while (!param
->quit
) {
320 block
= param
->block
;
321 offset
= param
->offset
;
323 qemu_mutex_unlock(¶m
->mutex
);
325 do_compress_ram_page(param
->file
, ¶m
->stream
, block
, offset
,
328 qemu_mutex_lock(&comp_done_lock
);
330 qemu_cond_signal(&comp_done_cond
);
331 qemu_mutex_unlock(&comp_done_lock
);
333 qemu_mutex_lock(¶m
->mutex
);
335 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
338 qemu_mutex_unlock(¶m
->mutex
);
343 static inline void terminate_compression_threads(void)
345 int idx
, thread_count
;
347 thread_count
= migrate_compress_threads();
349 for (idx
= 0; idx
< thread_count
; idx
++) {
350 qemu_mutex_lock(&comp_param
[idx
].mutex
);
351 comp_param
[idx
].quit
= true;
352 qemu_cond_signal(&comp_param
[idx
].cond
);
353 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
357 static void compress_threads_save_cleanup(void)
361 if (!migrate_use_compression()) {
364 terminate_compression_threads();
365 thread_count
= migrate_compress_threads();
366 for (i
= 0; i
< thread_count
; i
++) {
368 * we use it as a indicator which shows if the thread is
369 * properly init'd or not
371 if (!comp_param
[i
].file
) {
374 qemu_thread_join(compress_threads
+ i
);
375 qemu_mutex_destroy(&comp_param
[i
].mutex
);
376 qemu_cond_destroy(&comp_param
[i
].cond
);
377 deflateEnd(&comp_param
[i
].stream
);
378 g_free(comp_param
[i
].originbuf
);
379 qemu_fclose(comp_param
[i
].file
);
380 comp_param
[i
].file
= NULL
;
382 qemu_mutex_destroy(&comp_done_lock
);
383 qemu_cond_destroy(&comp_done_cond
);
384 g_free(compress_threads
);
386 compress_threads
= NULL
;
390 static int compress_threads_save_setup(void)
394 if (!migrate_use_compression()) {
397 thread_count
= migrate_compress_threads();
398 compress_threads
= g_new0(QemuThread
, thread_count
);
399 comp_param
= g_new0(CompressParam
, thread_count
);
400 qemu_cond_init(&comp_done_cond
);
401 qemu_mutex_init(&comp_done_lock
);
402 for (i
= 0; i
< thread_count
; i
++) {
403 comp_param
[i
].originbuf
= g_try_malloc(TARGET_PAGE_SIZE
);
404 if (!comp_param
[i
].originbuf
) {
408 if (deflateInit(&comp_param
[i
].stream
,
409 migrate_compress_level()) != Z_OK
) {
410 g_free(comp_param
[i
].originbuf
);
414 /* comp_param[i].file is just used as a dummy buffer to save data,
415 * set its ops to empty.
417 comp_param
[i
].file
= qemu_fopen_ops(NULL
, &empty_ops
);
418 comp_param
[i
].done
= true;
419 comp_param
[i
].quit
= false;
420 qemu_mutex_init(&comp_param
[i
].mutex
);
421 qemu_cond_init(&comp_param
[i
].cond
);
422 qemu_thread_create(compress_threads
+ i
, "compress",
423 do_data_compress
, comp_param
+ i
,
424 QEMU_THREAD_JOINABLE
);
429 compress_threads_save_cleanup();
435 struct MultiFDSendParams
{
443 typedef struct MultiFDSendParams MultiFDSendParams
;
446 MultiFDSendParams
*params
;
447 /* number of created threads */
449 } *multifd_send_state
;
451 static void terminate_multifd_send_threads(Error
*errp
)
455 for (i
= 0; i
< multifd_send_state
->count
; i
++) {
456 MultiFDSendParams
*p
= &multifd_send_state
->params
[i
];
458 qemu_mutex_lock(&p
->mutex
);
460 qemu_sem_post(&p
->sem
);
461 qemu_mutex_unlock(&p
->mutex
);
465 int multifd_save_cleanup(Error
**errp
)
470 if (!migrate_use_multifd()) {
473 terminate_multifd_send_threads(NULL
);
474 for (i
= 0; i
< multifd_send_state
->count
; i
++) {
475 MultiFDSendParams
*p
= &multifd_send_state
->params
[i
];
477 qemu_thread_join(&p
->thread
);
478 qemu_mutex_destroy(&p
->mutex
);
479 qemu_sem_destroy(&p
->sem
);
483 g_free(multifd_send_state
->params
);
484 multifd_send_state
->params
= NULL
;
485 g_free(multifd_send_state
);
486 multifd_send_state
= NULL
;
490 static void *multifd_send_thread(void *opaque
)
492 MultiFDSendParams
*p
= opaque
;
495 qemu_mutex_lock(&p
->mutex
);
497 qemu_mutex_unlock(&p
->mutex
);
500 qemu_mutex_unlock(&p
->mutex
);
501 qemu_sem_wait(&p
->sem
);
507 int multifd_save_setup(void)
512 if (!migrate_use_multifd()) {
515 thread_count
= migrate_multifd_channels();
516 multifd_send_state
= g_malloc0(sizeof(*multifd_send_state
));
517 multifd_send_state
->params
= g_new0(MultiFDSendParams
, thread_count
);
518 multifd_send_state
->count
= 0;
519 for (i
= 0; i
< thread_count
; i
++) {
520 MultiFDSendParams
*p
= &multifd_send_state
->params
[i
];
522 qemu_mutex_init(&p
->mutex
);
523 qemu_sem_init(&p
->sem
, 0);
526 p
->name
= g_strdup_printf("multifdsend_%d", i
);
527 qemu_thread_create(&p
->thread
, p
->name
, multifd_send_thread
, p
,
528 QEMU_THREAD_JOINABLE
);
530 multifd_send_state
->count
++;
535 struct MultiFDRecvParams
{
543 typedef struct MultiFDRecvParams MultiFDRecvParams
;
546 MultiFDRecvParams
*params
;
547 /* number of created threads */
549 } *multifd_recv_state
;
551 static void terminate_multifd_recv_threads(Error
*errp
)
555 for (i
= 0; i
< multifd_recv_state
->count
; i
++) {
556 MultiFDRecvParams
*p
= &multifd_recv_state
->params
[i
];
558 qemu_mutex_lock(&p
->mutex
);
560 qemu_sem_post(&p
->sem
);
561 qemu_mutex_unlock(&p
->mutex
);
565 int multifd_load_cleanup(Error
**errp
)
570 if (!migrate_use_multifd()) {
573 terminate_multifd_recv_threads(NULL
);
574 for (i
= 0; i
< multifd_recv_state
->count
; i
++) {
575 MultiFDRecvParams
*p
= &multifd_recv_state
->params
[i
];
577 qemu_thread_join(&p
->thread
);
578 qemu_mutex_destroy(&p
->mutex
);
579 qemu_sem_destroy(&p
->sem
);
583 g_free(multifd_recv_state
->params
);
584 multifd_recv_state
->params
= NULL
;
585 g_free(multifd_recv_state
);
586 multifd_recv_state
= NULL
;
591 static void *multifd_recv_thread(void *opaque
)
593 MultiFDRecvParams
*p
= opaque
;
596 qemu_mutex_lock(&p
->mutex
);
598 qemu_mutex_unlock(&p
->mutex
);
601 qemu_mutex_unlock(&p
->mutex
);
602 qemu_sem_wait(&p
->sem
);
608 int multifd_load_setup(void)
613 if (!migrate_use_multifd()) {
616 thread_count
= migrate_multifd_channels();
617 multifd_recv_state
= g_malloc0(sizeof(*multifd_recv_state
));
618 multifd_recv_state
->params
= g_new0(MultiFDRecvParams
, thread_count
);
619 multifd_recv_state
->count
= 0;
620 for (i
= 0; i
< thread_count
; i
++) {
621 MultiFDRecvParams
*p
= &multifd_recv_state
->params
[i
];
623 qemu_mutex_init(&p
->mutex
);
624 qemu_sem_init(&p
->sem
, 0);
627 p
->name
= g_strdup_printf("multifdrecv_%d", i
);
628 qemu_thread_create(&p
->thread
, p
->name
, multifd_recv_thread
, p
,
629 QEMU_THREAD_JOINABLE
);
630 multifd_recv_state
->count
++;
636 * save_page_header: write page header to wire
638 * If this is the 1st block, it also writes the block identification
640 * Returns the number of bytes written
642 * @f: QEMUFile where to send the data
643 * @block: block that contains the page we want to send
644 * @offset: offset inside the block for the page
645 * in the lower bits, it contains flags
647 static size_t save_page_header(RAMState
*rs
, QEMUFile
*f
, RAMBlock
*block
,
652 if (block
== rs
->last_sent_block
) {
653 offset
|= RAM_SAVE_FLAG_CONTINUE
;
655 qemu_put_be64(f
, offset
);
658 if (!(offset
& RAM_SAVE_FLAG_CONTINUE
)) {
659 len
= strlen(block
->idstr
);
660 qemu_put_byte(f
, len
);
661 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, len
);
663 rs
->last_sent_block
= block
;
669 * mig_throttle_guest_down: throotle down the guest
671 * Reduce amount of guest cpu execution to hopefully slow down memory
672 * writes. If guest dirty memory rate is reduced below the rate at
673 * which we can transfer pages to the destination then we should be
674 * able to complete migration. Some workloads dirty memory way too
675 * fast and will not effectively converge, even with auto-converge.
677 static void mig_throttle_guest_down(void)
679 MigrationState
*s
= migrate_get_current();
680 uint64_t pct_initial
= s
->parameters
.cpu_throttle_initial
;
681 uint64_t pct_icrement
= s
->parameters
.cpu_throttle_increment
;
683 /* We have not started throttling yet. Let's start it. */
684 if (!cpu_throttle_active()) {
685 cpu_throttle_set(pct_initial
);
687 /* Throttling already on, just increase the rate */
688 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement
);
693 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
695 * @rs: current RAM state
696 * @current_addr: address for the zero page
698 * Update the xbzrle cache to reflect a page that's been sent as all 0.
699 * The important thing is that a stale (not-yet-0'd) page be replaced
701 * As a bonus, if the page wasn't in the cache it gets added so that
702 * when a small write is made into the 0'd page it gets XBZRLE sent.
704 static void xbzrle_cache_zero_page(RAMState
*rs
, ram_addr_t current_addr
)
706 if (rs
->ram_bulk_stage
|| !migrate_use_xbzrle()) {
710 /* We don't care if this fails to allocate a new cache page
711 * as long as it updated an old one */
712 cache_insert(XBZRLE
.cache
, current_addr
, XBZRLE
.zero_target_page
,
713 ram_counters
.dirty_sync_count
);
716 #define ENCODING_FLAG_XBZRLE 0x1
719 * save_xbzrle_page: compress and send current page
721 * Returns: 1 means that we wrote the page
722 * 0 means that page is identical to the one already sent
723 * -1 means that xbzrle would be longer than normal
725 * @rs: current RAM state
726 * @current_data: pointer to the address of the page contents
727 * @current_addr: addr of the page
728 * @block: block that contains the page we want to send
729 * @offset: offset inside the block for the page
730 * @last_stage: if we are at the completion stage
732 static int save_xbzrle_page(RAMState
*rs
, uint8_t **current_data
,
733 ram_addr_t current_addr
, RAMBlock
*block
,
734 ram_addr_t offset
, bool last_stage
)
736 int encoded_len
= 0, bytes_xbzrle
;
737 uint8_t *prev_cached_page
;
739 if (!cache_is_cached(XBZRLE
.cache
, current_addr
,
740 ram_counters
.dirty_sync_count
)) {
741 xbzrle_counters
.cache_miss
++;
743 if (cache_insert(XBZRLE
.cache
, current_addr
, *current_data
,
744 ram_counters
.dirty_sync_count
) == -1) {
747 /* update *current_data when the page has been
748 inserted into cache */
749 *current_data
= get_cached_data(XBZRLE
.cache
, current_addr
);
755 prev_cached_page
= get_cached_data(XBZRLE
.cache
, current_addr
);
757 /* save current buffer into memory */
758 memcpy(XBZRLE
.current_buf
, *current_data
, TARGET_PAGE_SIZE
);
760 /* XBZRLE encoding (if there is no overflow) */
761 encoded_len
= xbzrle_encode_buffer(prev_cached_page
, XBZRLE
.current_buf
,
762 TARGET_PAGE_SIZE
, XBZRLE
.encoded_buf
,
764 if (encoded_len
== 0) {
765 trace_save_xbzrle_page_skipping();
767 } else if (encoded_len
== -1) {
768 trace_save_xbzrle_page_overflow();
769 xbzrle_counters
.overflow
++;
770 /* update data in the cache */
772 memcpy(prev_cached_page
, *current_data
, TARGET_PAGE_SIZE
);
773 *current_data
= prev_cached_page
;
778 /* we need to update the data in the cache, in order to get the same data */
780 memcpy(prev_cached_page
, XBZRLE
.current_buf
, TARGET_PAGE_SIZE
);
783 /* Send XBZRLE based compressed page */
784 bytes_xbzrle
= save_page_header(rs
, rs
->f
, block
,
785 offset
| RAM_SAVE_FLAG_XBZRLE
);
786 qemu_put_byte(rs
->f
, ENCODING_FLAG_XBZRLE
);
787 qemu_put_be16(rs
->f
, encoded_len
);
788 qemu_put_buffer(rs
->f
, XBZRLE
.encoded_buf
, encoded_len
);
789 bytes_xbzrle
+= encoded_len
+ 1 + 2;
790 xbzrle_counters
.pages
++;
791 xbzrle_counters
.bytes
+= bytes_xbzrle
;
792 ram_counters
.transferred
+= bytes_xbzrle
;
798 * migration_bitmap_find_dirty: find the next dirty page from start
800 * Called with rcu_read_lock() to protect migration_bitmap
802 * Returns the byte offset within memory region of the start of a dirty page
804 * @rs: current RAM state
805 * @rb: RAMBlock where to search for dirty pages
806 * @start: page where we start the search
809 unsigned long migration_bitmap_find_dirty(RAMState
*rs
, RAMBlock
*rb
,
812 unsigned long size
= rb
->used_length
>> TARGET_PAGE_BITS
;
813 unsigned long *bitmap
= rb
->bmap
;
816 if (rs
->ram_bulk_stage
&& start
> 0) {
819 next
= find_next_bit(bitmap
, size
, start
);
825 static inline bool migration_bitmap_clear_dirty(RAMState
*rs
,
831 ret
= test_and_clear_bit(page
, rb
->bmap
);
834 rs
->migration_dirty_pages
--;
839 static void migration_bitmap_sync_range(RAMState
*rs
, RAMBlock
*rb
,
840 ram_addr_t start
, ram_addr_t length
)
842 rs
->migration_dirty_pages
+=
843 cpu_physical_memory_sync_dirty_bitmap(rb
, start
, length
,
844 &rs
->num_dirty_pages_period
);
848 * ram_pagesize_summary: calculate all the pagesizes of a VM
850 * Returns a summary bitmap of the page sizes of all RAMBlocks
852 * For VMs with just normal pages this is equivalent to the host page
853 * size. If it's got some huge pages then it's the OR of all the
854 * different page sizes.
856 uint64_t ram_pagesize_summary(void)
859 uint64_t summary
= 0;
861 RAMBLOCK_FOREACH(block
) {
862 summary
|= block
->page_size
;
868 static void migration_bitmap_sync(RAMState
*rs
)
872 uint64_t bytes_xfer_now
;
874 ram_counters
.dirty_sync_count
++;
876 if (!rs
->time_last_bitmap_sync
) {
877 rs
->time_last_bitmap_sync
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
880 trace_migration_bitmap_sync_start();
881 memory_global_dirty_log_sync();
883 qemu_mutex_lock(&rs
->bitmap_mutex
);
885 RAMBLOCK_FOREACH(block
) {
886 migration_bitmap_sync_range(rs
, block
, 0, block
->used_length
);
889 qemu_mutex_unlock(&rs
->bitmap_mutex
);
891 trace_migration_bitmap_sync_end(rs
->num_dirty_pages_period
);
893 end_time
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
895 /* more than 1 second = 1000 millisecons */
896 if (end_time
> rs
->time_last_bitmap_sync
+ 1000) {
897 /* calculate period counters */
898 ram_counters
.dirty_pages_rate
= rs
->num_dirty_pages_period
* 1000
899 / (end_time
- rs
->time_last_bitmap_sync
);
900 bytes_xfer_now
= ram_counters
.transferred
;
902 /* During block migration the auto-converge logic incorrectly detects
903 * that ram migration makes no progress. Avoid this by disabling the
904 * throttling logic during the bulk phase of block migration. */
905 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
906 /* The following detection logic can be refined later. For now:
907 Check to see if the dirtied bytes is 50% more than the approx.
908 amount of bytes that just got transferred since the last time we
909 were in this routine. If that happens twice, start or increase
912 if ((rs
->num_dirty_pages_period
* TARGET_PAGE_SIZE
>
913 (bytes_xfer_now
- rs
->bytes_xfer_prev
) / 2) &&
914 (++rs
->dirty_rate_high_cnt
>= 2)) {
915 trace_migration_throttle();
916 rs
->dirty_rate_high_cnt
= 0;
917 mig_throttle_guest_down();
921 if (migrate_use_xbzrle()) {
922 if (rs
->iterations_prev
!= rs
->iterations
) {
923 xbzrle_counters
.cache_miss_rate
=
924 (double)(xbzrle_counters
.cache_miss
-
925 rs
->xbzrle_cache_miss_prev
) /
926 (rs
->iterations
- rs
->iterations_prev
);
928 rs
->iterations_prev
= rs
->iterations
;
929 rs
->xbzrle_cache_miss_prev
= xbzrle_counters
.cache_miss
;
932 /* reset period counters */
933 rs
->time_last_bitmap_sync
= end_time
;
934 rs
->num_dirty_pages_period
= 0;
935 rs
->bytes_xfer_prev
= bytes_xfer_now
;
937 if (migrate_use_events()) {
938 qapi_event_send_migration_pass(ram_counters
.dirty_sync_count
, NULL
);
943 * save_zero_page: send the zero page to the stream
945 * Returns the number of pages written.
947 * @rs: current RAM state
948 * @block: block that contains the page we want to send
949 * @offset: offset inside the block for the page
951 static int save_zero_page(RAMState
*rs
, RAMBlock
*block
, ram_addr_t offset
)
953 uint8_t *p
= block
->host
+ offset
;
956 if (is_zero_range(p
, TARGET_PAGE_SIZE
)) {
957 ram_counters
.duplicate
++;
958 ram_counters
.transferred
+=
959 save_page_header(rs
, rs
->f
, block
, offset
| RAM_SAVE_FLAG_ZERO
);
960 qemu_put_byte(rs
->f
, 0);
961 ram_counters
.transferred
+= 1;
968 static void ram_release_pages(const char *rbname
, uint64_t offset
, int pages
)
970 if (!migrate_release_ram() || !migration_in_postcopy()) {
974 ram_discard_range(rbname
, offset
, pages
<< TARGET_PAGE_BITS
);
978 * @pages: the number of pages written by the control path,
980 * > 0 - number of pages written
982 * Return true if the pages has been saved, otherwise false is returned.
984 static bool control_save_page(RAMState
*rs
, RAMBlock
*block
, ram_addr_t offset
,
987 uint64_t bytes_xmit
= 0;
991 ret
= ram_control_save_page(rs
->f
, block
->offset
, offset
, TARGET_PAGE_SIZE
,
993 if (ret
== RAM_SAVE_CONTROL_NOT_SUPP
) {
998 ram_counters
.transferred
+= bytes_xmit
;
1002 if (ret
== RAM_SAVE_CONTROL_DELAYED
) {
1006 if (bytes_xmit
> 0) {
1007 ram_counters
.normal
++;
1008 } else if (bytes_xmit
== 0) {
1009 ram_counters
.duplicate
++;
1016 * directly send the page to the stream
1018 * Returns the number of pages written.
1020 * @rs: current RAM state
1021 * @block: block that contains the page we want to send
1022 * @offset: offset inside the block for the page
1023 * @buf: the page to be sent
1024 * @async: send to page asyncly
1026 static int save_normal_page(RAMState
*rs
, RAMBlock
*block
, ram_addr_t offset
,
1027 uint8_t *buf
, bool async
)
1029 ram_counters
.transferred
+= save_page_header(rs
, rs
->f
, block
,
1030 offset
| RAM_SAVE_FLAG_PAGE
);
1032 qemu_put_buffer_async(rs
->f
, buf
, TARGET_PAGE_SIZE
,
1033 migrate_release_ram() &
1034 migration_in_postcopy());
1036 qemu_put_buffer(rs
->f
, buf
, TARGET_PAGE_SIZE
);
1038 ram_counters
.transferred
+= TARGET_PAGE_SIZE
;
1039 ram_counters
.normal
++;
1044 * ram_save_page: send the given page to the stream
1046 * Returns the number of pages written.
1048 * >=0 - Number of pages written - this might legally be 0
1049 * if xbzrle noticed the page was the same.
1051 * @rs: current RAM state
1052 * @block: block that contains the page we want to send
1053 * @offset: offset inside the block for the page
1054 * @last_stage: if we are at the completion stage
1056 static int ram_save_page(RAMState
*rs
, PageSearchStatus
*pss
, bool last_stage
)
1060 bool send_async
= true;
1061 RAMBlock
*block
= pss
->block
;
1062 ram_addr_t offset
= pss
->page
<< TARGET_PAGE_BITS
;
1063 ram_addr_t current_addr
= block
->offset
+ offset
;
1065 p
= block
->host
+ offset
;
1066 trace_ram_save_page(block
->idstr
, (uint64_t)offset
, p
);
1068 XBZRLE_cache_lock();
1069 if (!rs
->ram_bulk_stage
&& !migration_in_postcopy() &&
1070 migrate_use_xbzrle()) {
1071 pages
= save_xbzrle_page(rs
, &p
, current_addr
, block
,
1072 offset
, last_stage
);
1074 /* Can't send this cached data async, since the cache page
1075 * might get updated before it gets to the wire
1081 /* XBZRLE overflow or normal page */
1083 pages
= save_normal_page(rs
, block
, offset
, p
, send_async
);
1086 XBZRLE_cache_unlock();
1091 static int do_compress_ram_page(QEMUFile
*f
, z_stream
*stream
, RAMBlock
*block
,
1092 ram_addr_t offset
, uint8_t *source_buf
)
1094 RAMState
*rs
= ram_state
;
1095 int bytes_sent
, blen
;
1096 uint8_t *p
= block
->host
+ (offset
& TARGET_PAGE_MASK
);
1098 bytes_sent
= save_page_header(rs
, f
, block
, offset
|
1099 RAM_SAVE_FLAG_COMPRESS_PAGE
);
1102 * copy it to a internal buffer to avoid it being modified by VM
1103 * so that we can catch up the error during compression and
1106 memcpy(source_buf
, p
, TARGET_PAGE_SIZE
);
1107 blen
= qemu_put_compression_data(f
, stream
, source_buf
, TARGET_PAGE_SIZE
);
1110 qemu_file_set_error(migrate_get_current()->to_dst_file
, blen
);
1111 error_report("compressed data failed!");
1114 ram_release_pages(block
->idstr
, offset
& TARGET_PAGE_MASK
, 1);
1120 static void flush_compressed_data(RAMState
*rs
)
1122 int idx
, len
, thread_count
;
1124 if (!migrate_use_compression()) {
1127 thread_count
= migrate_compress_threads();
1129 qemu_mutex_lock(&comp_done_lock
);
1130 for (idx
= 0; idx
< thread_count
; idx
++) {
1131 while (!comp_param
[idx
].done
) {
1132 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
1135 qemu_mutex_unlock(&comp_done_lock
);
1137 for (idx
= 0; idx
< thread_count
; idx
++) {
1138 qemu_mutex_lock(&comp_param
[idx
].mutex
);
1139 if (!comp_param
[idx
].quit
) {
1140 len
= qemu_put_qemu_file(rs
->f
, comp_param
[idx
].file
);
1141 ram_counters
.transferred
+= len
;
1143 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
1147 static inline void set_compress_params(CompressParam
*param
, RAMBlock
*block
,
1150 param
->block
= block
;
1151 param
->offset
= offset
;
1154 static int compress_page_with_multi_thread(RAMState
*rs
, RAMBlock
*block
,
1157 int idx
, thread_count
, bytes_xmit
= -1, pages
= -1;
1159 thread_count
= migrate_compress_threads();
1160 qemu_mutex_lock(&comp_done_lock
);
1162 for (idx
= 0; idx
< thread_count
; idx
++) {
1163 if (comp_param
[idx
].done
) {
1164 comp_param
[idx
].done
= false;
1165 bytes_xmit
= qemu_put_qemu_file(rs
->f
, comp_param
[idx
].file
);
1166 qemu_mutex_lock(&comp_param
[idx
].mutex
);
1167 set_compress_params(&comp_param
[idx
], block
, offset
);
1168 qemu_cond_signal(&comp_param
[idx
].cond
);
1169 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
1171 ram_counters
.normal
++;
1172 ram_counters
.transferred
+= bytes_xmit
;
1179 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
1182 qemu_mutex_unlock(&comp_done_lock
);
1188 * find_dirty_block: find the next dirty page and update any state
1189 * associated with the search process.
1191 * Returns if a page is found
1193 * @rs: current RAM state
1194 * @pss: data about the state of the current dirty page scan
1195 * @again: set to false if the search has scanned the whole of RAM
1197 static bool find_dirty_block(RAMState
*rs
, PageSearchStatus
*pss
, bool *again
)
1199 pss
->page
= migration_bitmap_find_dirty(rs
, pss
->block
, pss
->page
);
1200 if (pss
->complete_round
&& pss
->block
== rs
->last_seen_block
&&
1201 pss
->page
>= rs
->last_page
) {
1203 * We've been once around the RAM and haven't found anything.
1209 if ((pss
->page
<< TARGET_PAGE_BITS
) >= pss
->block
->used_length
) {
1210 /* Didn't find anything in this RAM Block */
1212 pss
->block
= QLIST_NEXT_RCU(pss
->block
, next
);
1214 /* Hit the end of the list */
1215 pss
->block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1216 /* Flag that we've looped */
1217 pss
->complete_round
= true;
1218 rs
->ram_bulk_stage
= false;
1219 if (migrate_use_xbzrle()) {
1220 /* If xbzrle is on, stop using the data compression at this
1221 * point. In theory, xbzrle can do better than compression.
1223 flush_compressed_data(rs
);
1226 /* Didn't find anything this time, but try again on the new block */
1230 /* Can go around again, but... */
1232 /* We've found something so probably don't need to */
1238 * unqueue_page: gets a page of the queue
1240 * Helper for 'get_queued_page' - gets a page off the queue
1242 * Returns the block of the page (or NULL if none available)
1244 * @rs: current RAM state
1245 * @offset: used to return the offset within the RAMBlock
1247 static RAMBlock
*unqueue_page(RAMState
*rs
, ram_addr_t
*offset
)
1249 RAMBlock
*block
= NULL
;
1251 qemu_mutex_lock(&rs
->src_page_req_mutex
);
1252 if (!QSIMPLEQ_EMPTY(&rs
->src_page_requests
)) {
1253 struct RAMSrcPageRequest
*entry
=
1254 QSIMPLEQ_FIRST(&rs
->src_page_requests
);
1256 *offset
= entry
->offset
;
1258 if (entry
->len
> TARGET_PAGE_SIZE
) {
1259 entry
->len
-= TARGET_PAGE_SIZE
;
1260 entry
->offset
+= TARGET_PAGE_SIZE
;
1262 memory_region_unref(block
->mr
);
1263 QSIMPLEQ_REMOVE_HEAD(&rs
->src_page_requests
, next_req
);
1267 qemu_mutex_unlock(&rs
->src_page_req_mutex
);
1273 * get_queued_page: unqueue a page from the postocpy requests
1275 * Skips pages that are already sent (!dirty)
1277 * Returns if a queued page is found
1279 * @rs: current RAM state
1280 * @pss: data about the state of the current dirty page scan
1282 static bool get_queued_page(RAMState
*rs
, PageSearchStatus
*pss
)
1289 block
= unqueue_page(rs
, &offset
);
1291 * We're sending this page, and since it's postcopy nothing else
1292 * will dirty it, and we must make sure it doesn't get sent again
1293 * even if this queue request was received after the background
1294 * search already sent it.
1299 page
= offset
>> TARGET_PAGE_BITS
;
1300 dirty
= test_bit(page
, block
->bmap
);
1302 trace_get_queued_page_not_dirty(block
->idstr
, (uint64_t)offset
,
1303 page
, test_bit(page
, block
->unsentmap
));
1305 trace_get_queued_page(block
->idstr
, (uint64_t)offset
, page
);
1309 } while (block
&& !dirty
);
1313 * As soon as we start servicing pages out of order, then we have
1314 * to kill the bulk stage, since the bulk stage assumes
1315 * in (migration_bitmap_find_and_reset_dirty) that every page is
1316 * dirty, that's no longer true.
1318 rs
->ram_bulk_stage
= false;
1321 * We want the background search to continue from the queued page
1322 * since the guest is likely to want other pages near to the page
1323 * it just requested.
1326 pss
->page
= offset
>> TARGET_PAGE_BITS
;
1333 * migration_page_queue_free: drop any remaining pages in the ram
1336 * It should be empty at the end anyway, but in error cases there may
1337 * be some left. in case that there is any page left, we drop it.
1340 static void migration_page_queue_free(RAMState
*rs
)
1342 struct RAMSrcPageRequest
*mspr
, *next_mspr
;
1343 /* This queue generally should be empty - but in the case of a failed
1344 * migration might have some droppings in.
1347 QSIMPLEQ_FOREACH_SAFE(mspr
, &rs
->src_page_requests
, next_req
, next_mspr
) {
1348 memory_region_unref(mspr
->rb
->mr
);
1349 QSIMPLEQ_REMOVE_HEAD(&rs
->src_page_requests
, next_req
);
1356 * ram_save_queue_pages: queue the page for transmission
1358 * A request from postcopy destination for example.
1360 * Returns zero on success or negative on error
1362 * @rbname: Name of the RAMBLock of the request. NULL means the
1363 * same that last one.
1364 * @start: starting address from the start of the RAMBlock
1365 * @len: length (in bytes) to send
1367 int ram_save_queue_pages(const char *rbname
, ram_addr_t start
, ram_addr_t len
)
1370 RAMState
*rs
= ram_state
;
1372 ram_counters
.postcopy_requests
++;
1375 /* Reuse last RAMBlock */
1376 ramblock
= rs
->last_req_rb
;
1380 * Shouldn't happen, we can't reuse the last RAMBlock if
1381 * it's the 1st request.
1383 error_report("ram_save_queue_pages no previous block");
1387 ramblock
= qemu_ram_block_by_name(rbname
);
1390 /* We shouldn't be asked for a non-existent RAMBlock */
1391 error_report("ram_save_queue_pages no block '%s'", rbname
);
1394 rs
->last_req_rb
= ramblock
;
1396 trace_ram_save_queue_pages(ramblock
->idstr
, start
, len
);
1397 if (start
+len
> ramblock
->used_length
) {
1398 error_report("%s request overrun start=" RAM_ADDR_FMT
" len="
1399 RAM_ADDR_FMT
" blocklen=" RAM_ADDR_FMT
,
1400 __func__
, start
, len
, ramblock
->used_length
);
1404 struct RAMSrcPageRequest
*new_entry
=
1405 g_malloc0(sizeof(struct RAMSrcPageRequest
));
1406 new_entry
->rb
= ramblock
;
1407 new_entry
->offset
= start
;
1408 new_entry
->len
= len
;
1410 memory_region_ref(ramblock
->mr
);
1411 qemu_mutex_lock(&rs
->src_page_req_mutex
);
1412 QSIMPLEQ_INSERT_TAIL(&rs
->src_page_requests
, new_entry
, next_req
);
1413 qemu_mutex_unlock(&rs
->src_page_req_mutex
);
1423 static bool save_page_use_compression(RAMState
*rs
)
1425 if (!migrate_use_compression()) {
1430 * If xbzrle is on, stop using the data compression after first
1431 * round of migration even if compression is enabled. In theory,
1432 * xbzrle can do better than compression.
1434 if (rs
->ram_bulk_stage
|| !migrate_use_xbzrle()) {
1442 * ram_save_target_page: save one target page
1444 * Returns the number of pages written
1446 * @rs: current RAM state
1447 * @pss: data about the page we want to send
1448 * @last_stage: if we are at the completion stage
1450 static int ram_save_target_page(RAMState
*rs
, PageSearchStatus
*pss
,
1453 RAMBlock
*block
= pss
->block
;
1454 ram_addr_t offset
= pss
->page
<< TARGET_PAGE_BITS
;
1457 if (control_save_page(rs
, block
, offset
, &res
)) {
1462 * When starting the process of a new block, the first page of
1463 * the block should be sent out before other pages in the same
1464 * block, and all the pages in last block should have been sent
1465 * out, keeping this order is important, because the 'cont' flag
1466 * is used to avoid resending the block name.
1468 if (block
!= rs
->last_sent_block
&& save_page_use_compression(rs
)) {
1469 flush_compressed_data(rs
);
1472 res
= save_zero_page(rs
, block
, offset
);
1474 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
1475 * page would be stale
1477 if (!save_page_use_compression(rs
)) {
1478 XBZRLE_cache_lock();
1479 xbzrle_cache_zero_page(rs
, block
->offset
+ offset
);
1480 XBZRLE_cache_unlock();
1482 ram_release_pages(block
->idstr
, offset
, res
);
1487 * Make sure the first page is sent out before other pages.
1489 * we post it as normal page as compression will take much
1492 if (block
== rs
->last_sent_block
&& save_page_use_compression(rs
)) {
1493 res
= compress_page_with_multi_thread(rs
, block
, offset
);
1496 return ram_save_page(rs
, pss
, last_stage
);
1500 * ram_save_host_page: save a whole host page
1502 * Starting at *offset send pages up to the end of the current host
1503 * page. It's valid for the initial offset to point into the middle of
1504 * a host page in which case the remainder of the hostpage is sent.
1505 * Only dirty target pages are sent. Note that the host page size may
1506 * be a huge page for this block.
1507 * The saving stops at the boundary of the used_length of the block
1508 * if the RAMBlock isn't a multiple of the host page size.
1510 * Returns the number of pages written or negative on error
1512 * @rs: current RAM state
1513 * @ms: current migration state
1514 * @pss: data about the page we want to send
1515 * @last_stage: if we are at the completion stage
1517 static int ram_save_host_page(RAMState
*rs
, PageSearchStatus
*pss
,
1520 int tmppages
, pages
= 0;
1521 size_t pagesize_bits
=
1522 qemu_ram_pagesize(pss
->block
) >> TARGET_PAGE_BITS
;
1525 /* Check the pages is dirty and if it is send it */
1526 if (!migration_bitmap_clear_dirty(rs
, pss
->block
, pss
->page
)) {
1531 tmppages
= ram_save_target_page(rs
, pss
, last_stage
);
1537 if (pss
->block
->unsentmap
) {
1538 clear_bit(pss
->page
, pss
->block
->unsentmap
);
1542 } while ((pss
->page
& (pagesize_bits
- 1)) &&
1543 offset_in_ramblock(pss
->block
, pss
->page
<< TARGET_PAGE_BITS
));
1545 /* The offset we leave with is the last one we looked at */
1551 * ram_find_and_save_block: finds a dirty page and sends it to f
1553 * Called within an RCU critical section.
1555 * Returns the number of pages written where zero means no dirty pages
1557 * @rs: current RAM state
1558 * @last_stage: if we are at the completion stage
1560 * On systems where host-page-size > target-page-size it will send all the
1561 * pages in a host page that are dirty.
1564 static int ram_find_and_save_block(RAMState
*rs
, bool last_stage
)
1566 PageSearchStatus pss
;
1570 /* No dirty page as there is zero RAM */
1571 if (!ram_bytes_total()) {
1575 pss
.block
= rs
->last_seen_block
;
1576 pss
.page
= rs
->last_page
;
1577 pss
.complete_round
= false;
1580 pss
.block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1585 found
= get_queued_page(rs
, &pss
);
1588 /* priority queue empty, so just search for something dirty */
1589 found
= find_dirty_block(rs
, &pss
, &again
);
1593 pages
= ram_save_host_page(rs
, &pss
, last_stage
);
1595 } while (!pages
&& again
);
1597 rs
->last_seen_block
= pss
.block
;
1598 rs
->last_page
= pss
.page
;
1603 void acct_update_position(QEMUFile
*f
, size_t size
, bool zero
)
1605 uint64_t pages
= size
/ TARGET_PAGE_SIZE
;
1608 ram_counters
.duplicate
+= pages
;
1610 ram_counters
.normal
+= pages
;
1611 ram_counters
.transferred
+= size
;
1612 qemu_update_position(f
, size
);
1616 uint64_t ram_bytes_total(void)
1622 RAMBLOCK_FOREACH(block
) {
1623 total
+= block
->used_length
;
1629 static void xbzrle_load_setup(void)
1631 XBZRLE
.decoded_buf
= g_malloc(TARGET_PAGE_SIZE
);
1634 static void xbzrle_load_cleanup(void)
1636 g_free(XBZRLE
.decoded_buf
);
1637 XBZRLE
.decoded_buf
= NULL
;
1640 static void ram_state_cleanup(RAMState
**rsp
)
1643 migration_page_queue_free(*rsp
);
1644 qemu_mutex_destroy(&(*rsp
)->bitmap_mutex
);
1645 qemu_mutex_destroy(&(*rsp
)->src_page_req_mutex
);
1651 static void xbzrle_cleanup(void)
1653 XBZRLE_cache_lock();
1655 cache_fini(XBZRLE
.cache
);
1656 g_free(XBZRLE
.encoded_buf
);
1657 g_free(XBZRLE
.current_buf
);
1658 g_free(XBZRLE
.zero_target_page
);
1659 XBZRLE
.cache
= NULL
;
1660 XBZRLE
.encoded_buf
= NULL
;
1661 XBZRLE
.current_buf
= NULL
;
1662 XBZRLE
.zero_target_page
= NULL
;
1664 XBZRLE_cache_unlock();
1667 static void ram_save_cleanup(void *opaque
)
1669 RAMState
**rsp
= opaque
;
1672 /* caller have hold iothread lock or is in a bh, so there is
1673 * no writing race against this migration_bitmap
1675 memory_global_dirty_log_stop();
1677 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1678 g_free(block
->bmap
);
1680 g_free(block
->unsentmap
);
1681 block
->unsentmap
= NULL
;
1685 compress_threads_save_cleanup();
1686 ram_state_cleanup(rsp
);
1689 static void ram_state_reset(RAMState
*rs
)
1691 rs
->last_seen_block
= NULL
;
1692 rs
->last_sent_block
= NULL
;
1694 rs
->last_version
= ram_list
.version
;
1695 rs
->ram_bulk_stage
= true;
1698 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1701 * 'expected' is the value you expect the bitmap mostly to be full
1702 * of; it won't bother printing lines that are all this value.
1703 * If 'todump' is null the migration bitmap is dumped.
1705 void ram_debug_dump_bitmap(unsigned long *todump
, bool expected
,
1706 unsigned long pages
)
1709 int64_t linelen
= 128;
1712 for (cur
= 0; cur
< pages
; cur
+= linelen
) {
1716 * Last line; catch the case where the line length
1717 * is longer than remaining ram
1719 if (cur
+ linelen
> pages
) {
1720 linelen
= pages
- cur
;
1722 for (curb
= 0; curb
< linelen
; curb
++) {
1723 bool thisbit
= test_bit(cur
+ curb
, todump
);
1724 linebuf
[curb
] = thisbit
? '1' : '.';
1725 found
= found
|| (thisbit
!= expected
);
1728 linebuf
[curb
] = '\0';
1729 fprintf(stderr
, "0x%08" PRIx64
" : %s\n", cur
, linebuf
);
1734 /* **** functions for postcopy ***** */
1736 void ram_postcopy_migrated_memory_release(MigrationState
*ms
)
1738 struct RAMBlock
*block
;
1740 RAMBLOCK_FOREACH(block
) {
1741 unsigned long *bitmap
= block
->bmap
;
1742 unsigned long range
= block
->used_length
>> TARGET_PAGE_BITS
;
1743 unsigned long run_start
= find_next_zero_bit(bitmap
, range
, 0);
1745 while (run_start
< range
) {
1746 unsigned long run_end
= find_next_bit(bitmap
, range
, run_start
+ 1);
1747 ram_discard_range(block
->idstr
, run_start
<< TARGET_PAGE_BITS
,
1748 (run_end
- run_start
) << TARGET_PAGE_BITS
);
1749 run_start
= find_next_zero_bit(bitmap
, range
, run_end
+ 1);
1755 * postcopy_send_discard_bm_ram: discard a RAMBlock
1757 * Returns zero on success
1759 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1760 * Note: At this point the 'unsentmap' is the processed bitmap combined
1761 * with the dirtymap; so a '1' means it's either dirty or unsent.
1763 * @ms: current migration state
1764 * @pds: state for postcopy
1765 * @start: RAMBlock starting page
1766 * @length: RAMBlock size
1768 static int postcopy_send_discard_bm_ram(MigrationState
*ms
,
1769 PostcopyDiscardState
*pds
,
1772 unsigned long end
= block
->used_length
>> TARGET_PAGE_BITS
;
1773 unsigned long current
;
1774 unsigned long *unsentmap
= block
->unsentmap
;
1776 for (current
= 0; current
< end
; ) {
1777 unsigned long one
= find_next_bit(unsentmap
, end
, current
);
1780 unsigned long zero
= find_next_zero_bit(unsentmap
, end
, one
+ 1);
1781 unsigned long discard_length
;
1784 discard_length
= end
- one
;
1786 discard_length
= zero
- one
;
1788 if (discard_length
) {
1789 postcopy_discard_send_range(ms
, pds
, one
, discard_length
);
1791 current
= one
+ discard_length
;
1801 * postcopy_each_ram_send_discard: discard all RAMBlocks
1803 * Returns 0 for success or negative for error
1805 * Utility for the outgoing postcopy code.
1806 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1807 * passing it bitmap indexes and name.
1808 * (qemu_ram_foreach_block ends up passing unscaled lengths
1809 * which would mean postcopy code would have to deal with target page)
1811 * @ms: current migration state
1813 static int postcopy_each_ram_send_discard(MigrationState
*ms
)
1815 struct RAMBlock
*block
;
1818 RAMBLOCK_FOREACH(block
) {
1819 PostcopyDiscardState
*pds
=
1820 postcopy_discard_send_init(ms
, block
->idstr
);
1823 * Postcopy sends chunks of bitmap over the wire, but it
1824 * just needs indexes at this point, avoids it having
1825 * target page specific code.
1827 ret
= postcopy_send_discard_bm_ram(ms
, pds
, block
);
1828 postcopy_discard_send_finish(ms
, pds
);
1838 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1840 * Helper for postcopy_chunk_hostpages; it's called twice to
1841 * canonicalize the two bitmaps, that are similar, but one is
1844 * Postcopy requires that all target pages in a hostpage are dirty or
1845 * clean, not a mix. This function canonicalizes the bitmaps.
1847 * @ms: current migration state
1848 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1849 * otherwise we need to canonicalize partially dirty host pages
1850 * @block: block that contains the page we want to canonicalize
1851 * @pds: state for postcopy
1853 static void postcopy_chunk_hostpages_pass(MigrationState
*ms
, bool unsent_pass
,
1855 PostcopyDiscardState
*pds
)
1857 RAMState
*rs
= ram_state
;
1858 unsigned long *bitmap
= block
->bmap
;
1859 unsigned long *unsentmap
= block
->unsentmap
;
1860 unsigned int host_ratio
= block
->page_size
/ TARGET_PAGE_SIZE
;
1861 unsigned long pages
= block
->used_length
>> TARGET_PAGE_BITS
;
1862 unsigned long run_start
;
1864 if (block
->page_size
== TARGET_PAGE_SIZE
) {
1865 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1870 /* Find a sent page */
1871 run_start
= find_next_zero_bit(unsentmap
, pages
, 0);
1873 /* Find a dirty page */
1874 run_start
= find_next_bit(bitmap
, pages
, 0);
1877 while (run_start
< pages
) {
1878 bool do_fixup
= false;
1879 unsigned long fixup_start_addr
;
1880 unsigned long host_offset
;
1883 * If the start of this run of pages is in the middle of a host
1884 * page, then we need to fixup this host page.
1886 host_offset
= run_start
% host_ratio
;
1889 run_start
-= host_offset
;
1890 fixup_start_addr
= run_start
;
1891 /* For the next pass */
1892 run_start
= run_start
+ host_ratio
;
1894 /* Find the end of this run */
1895 unsigned long run_end
;
1897 run_end
= find_next_bit(unsentmap
, pages
, run_start
+ 1);
1899 run_end
= find_next_zero_bit(bitmap
, pages
, run_start
+ 1);
1902 * If the end isn't at the start of a host page, then the
1903 * run doesn't finish at the end of a host page
1904 * and we need to discard.
1906 host_offset
= run_end
% host_ratio
;
1909 fixup_start_addr
= run_end
- host_offset
;
1911 * This host page has gone, the next loop iteration starts
1912 * from after the fixup
1914 run_start
= fixup_start_addr
+ host_ratio
;
1917 * No discards on this iteration, next loop starts from
1918 * next sent/dirty page
1920 run_start
= run_end
+ 1;
1927 /* Tell the destination to discard this page */
1928 if (unsent_pass
|| !test_bit(fixup_start_addr
, unsentmap
)) {
1929 /* For the unsent_pass we:
1930 * discard partially sent pages
1931 * For the !unsent_pass (dirty) we:
1932 * discard partially dirty pages that were sent
1933 * (any partially sent pages were already discarded
1934 * by the previous unsent_pass)
1936 postcopy_discard_send_range(ms
, pds
, fixup_start_addr
,
1940 /* Clean up the bitmap */
1941 for (page
= fixup_start_addr
;
1942 page
< fixup_start_addr
+ host_ratio
; page
++) {
1943 /* All pages in this host page are now not sent */
1944 set_bit(page
, unsentmap
);
1947 * Remark them as dirty, updating the count for any pages
1948 * that weren't previously dirty.
1950 rs
->migration_dirty_pages
+= !test_and_set_bit(page
, bitmap
);
1955 /* Find the next sent page for the next iteration */
1956 run_start
= find_next_zero_bit(unsentmap
, pages
, run_start
);
1958 /* Find the next dirty page for the next iteration */
1959 run_start
= find_next_bit(bitmap
, pages
, run_start
);
1965 * postcopy_chuck_hostpages: discrad any partially sent host page
1967 * Utility for the outgoing postcopy code.
1969 * Discard any partially sent host-page size chunks, mark any partially
1970 * dirty host-page size chunks as all dirty. In this case the host-page
1971 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1973 * Returns zero on success
1975 * @ms: current migration state
1976 * @block: block we want to work with
1978 static int postcopy_chunk_hostpages(MigrationState
*ms
, RAMBlock
*block
)
1980 PostcopyDiscardState
*pds
=
1981 postcopy_discard_send_init(ms
, block
->idstr
);
1983 /* First pass: Discard all partially sent host pages */
1984 postcopy_chunk_hostpages_pass(ms
, true, block
, pds
);
1986 * Second pass: Ensure that all partially dirty host pages are made
1989 postcopy_chunk_hostpages_pass(ms
, false, block
, pds
);
1991 postcopy_discard_send_finish(ms
, pds
);
1996 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1998 * Returns zero on success
2000 * Transmit the set of pages to be discarded after precopy to the target
2001 * these are pages that:
2002 * a) Have been previously transmitted but are now dirty again
2003 * b) Pages that have never been transmitted, this ensures that
2004 * any pages on the destination that have been mapped by background
2005 * tasks get discarded (transparent huge pages is the specific concern)
2006 * Hopefully this is pretty sparse
2008 * @ms: current migration state
2010 int ram_postcopy_send_discard_bitmap(MigrationState
*ms
)
2012 RAMState
*rs
= ram_state
;
2018 /* This should be our last sync, the src is now paused */
2019 migration_bitmap_sync(rs
);
2021 /* Easiest way to make sure we don't resume in the middle of a host-page */
2022 rs
->last_seen_block
= NULL
;
2023 rs
->last_sent_block
= NULL
;
2026 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
2027 unsigned long pages
= block
->used_length
>> TARGET_PAGE_BITS
;
2028 unsigned long *bitmap
= block
->bmap
;
2029 unsigned long *unsentmap
= block
->unsentmap
;
2032 /* We don't have a safe way to resize the sentmap, so
2033 * if the bitmap was resized it will be NULL at this
2036 error_report("migration ram resized during precopy phase");
2040 /* Deal with TPS != HPS and huge pages */
2041 ret
= postcopy_chunk_hostpages(ms
, block
);
2048 * Update the unsentmap to be unsentmap = unsentmap | dirty
2050 bitmap_or(unsentmap
, unsentmap
, bitmap
, pages
);
2051 #ifdef DEBUG_POSTCOPY
2052 ram_debug_dump_bitmap(unsentmap
, true, pages
);
2055 trace_ram_postcopy_send_discard_bitmap();
2057 ret
= postcopy_each_ram_send_discard(ms
);
2064 * ram_discard_range: discard dirtied pages at the beginning of postcopy
2066 * Returns zero on success
2068 * @rbname: name of the RAMBlock of the request. NULL means the
2069 * same that last one.
2070 * @start: RAMBlock starting page
2071 * @length: RAMBlock size
2073 int ram_discard_range(const char *rbname
, uint64_t start
, size_t length
)
2077 trace_ram_discard_range(rbname
, start
, length
);
2080 RAMBlock
*rb
= qemu_ram_block_by_name(rbname
);
2083 error_report("ram_discard_range: Failed to find block '%s'", rbname
);
2087 bitmap_clear(rb
->receivedmap
, start
>> qemu_target_page_bits(),
2088 length
>> qemu_target_page_bits());
2089 ret
= ram_block_discard_range(rb
, start
, length
);
2098 * For every allocation, we will try not to crash the VM if the
2099 * allocation failed.
2101 static int xbzrle_init(void)
2103 Error
*local_err
= NULL
;
2105 if (!migrate_use_xbzrle()) {
2109 XBZRLE_cache_lock();
2111 XBZRLE
.zero_target_page
= g_try_malloc0(TARGET_PAGE_SIZE
);
2112 if (!XBZRLE
.zero_target_page
) {
2113 error_report("%s: Error allocating zero page", __func__
);
2117 XBZRLE
.cache
= cache_init(migrate_xbzrle_cache_size(),
2118 TARGET_PAGE_SIZE
, &local_err
);
2119 if (!XBZRLE
.cache
) {
2120 error_report_err(local_err
);
2121 goto free_zero_page
;
2124 XBZRLE
.encoded_buf
= g_try_malloc0(TARGET_PAGE_SIZE
);
2125 if (!XBZRLE
.encoded_buf
) {
2126 error_report("%s: Error allocating encoded_buf", __func__
);
2130 XBZRLE
.current_buf
= g_try_malloc(TARGET_PAGE_SIZE
);
2131 if (!XBZRLE
.current_buf
) {
2132 error_report("%s: Error allocating current_buf", __func__
);
2133 goto free_encoded_buf
;
2136 /* We are all good */
2137 XBZRLE_cache_unlock();
2141 g_free(XBZRLE
.encoded_buf
);
2142 XBZRLE
.encoded_buf
= NULL
;
2144 cache_fini(XBZRLE
.cache
);
2145 XBZRLE
.cache
= NULL
;
2147 g_free(XBZRLE
.zero_target_page
);
2148 XBZRLE
.zero_target_page
= NULL
;
2150 XBZRLE_cache_unlock();
2154 static int ram_state_init(RAMState
**rsp
)
2156 *rsp
= g_try_new0(RAMState
, 1);
2159 error_report("%s: Init ramstate fail", __func__
);
2163 qemu_mutex_init(&(*rsp
)->bitmap_mutex
);
2164 qemu_mutex_init(&(*rsp
)->src_page_req_mutex
);
2165 QSIMPLEQ_INIT(&(*rsp
)->src_page_requests
);
2168 * Count the total number of pages used by ram blocks not including any
2169 * gaps due to alignment or unplugs.
2171 (*rsp
)->migration_dirty_pages
= ram_bytes_total() >> TARGET_PAGE_BITS
;
2173 ram_state_reset(*rsp
);
2178 static void ram_list_init_bitmaps(void)
2181 unsigned long pages
;
2183 /* Skip setting bitmap if there is no RAM */
2184 if (ram_bytes_total()) {
2185 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
2186 pages
= block
->max_length
>> TARGET_PAGE_BITS
;
2187 block
->bmap
= bitmap_new(pages
);
2188 bitmap_set(block
->bmap
, 0, pages
);
2189 if (migrate_postcopy_ram()) {
2190 block
->unsentmap
= bitmap_new(pages
);
2191 bitmap_set(block
->unsentmap
, 0, pages
);
2197 static void ram_init_bitmaps(RAMState
*rs
)
2199 /* For memory_global_dirty_log_start below. */
2200 qemu_mutex_lock_iothread();
2201 qemu_mutex_lock_ramlist();
2204 ram_list_init_bitmaps();
2205 memory_global_dirty_log_start();
2206 migration_bitmap_sync(rs
);
2209 qemu_mutex_unlock_ramlist();
2210 qemu_mutex_unlock_iothread();
2213 static int ram_init_all(RAMState
**rsp
)
2215 if (ram_state_init(rsp
)) {
2219 if (xbzrle_init()) {
2220 ram_state_cleanup(rsp
);
2224 ram_init_bitmaps(*rsp
);
2230 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2231 * long-running RCU critical section. When rcu-reclaims in the code
2232 * start to become numerous it will be necessary to reduce the
2233 * granularity of these critical sections.
2237 * ram_save_setup: Setup RAM for migration
2239 * Returns zero to indicate success and negative for error
2241 * @f: QEMUFile where to send the data
2242 * @opaque: RAMState pointer
2244 static int ram_save_setup(QEMUFile
*f
, void *opaque
)
2246 RAMState
**rsp
= opaque
;
2249 if (compress_threads_save_setup()) {
2253 /* migration has already setup the bitmap, reuse it. */
2254 if (!migration_in_colo_state()) {
2255 if (ram_init_all(rsp
) != 0) {
2256 compress_threads_save_cleanup();
2264 qemu_put_be64(f
, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE
);
2266 RAMBLOCK_FOREACH(block
) {
2267 qemu_put_byte(f
, strlen(block
->idstr
));
2268 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, strlen(block
->idstr
));
2269 qemu_put_be64(f
, block
->used_length
);
2270 if (migrate_postcopy_ram() && block
->page_size
!= qemu_host_page_size
) {
2271 qemu_put_be64(f
, block
->page_size
);
2277 ram_control_before_iterate(f
, RAM_CONTROL_SETUP
);
2278 ram_control_after_iterate(f
, RAM_CONTROL_SETUP
);
2280 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2286 * ram_save_iterate: iterative stage for migration
2288 * Returns zero to indicate success and negative for error
2290 * @f: QEMUFile where to send the data
2291 * @opaque: RAMState pointer
2293 static int ram_save_iterate(QEMUFile
*f
, void *opaque
)
2295 RAMState
**temp
= opaque
;
2296 RAMState
*rs
= *temp
;
2302 if (blk_mig_bulk_active()) {
2303 /* Avoid transferring ram during bulk phase of block migration as
2304 * the bulk phase will usually take a long time and transferring
2305 * ram updates during that time is pointless. */
2310 if (ram_list
.version
!= rs
->last_version
) {
2311 ram_state_reset(rs
);
2314 /* Read version before ram_list.blocks */
2317 ram_control_before_iterate(f
, RAM_CONTROL_ROUND
);
2319 t0
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2321 while ((ret
= qemu_file_rate_limit(f
)) == 0) {
2324 pages
= ram_find_and_save_block(rs
, false);
2325 /* no more pages to sent */
2332 /* we want to check in the 1st loop, just in case it was the 1st time
2333 and we had to sync the dirty bitmap.
2334 qemu_get_clock_ns() is a bit expensive, so we only check each some
2337 if ((i
& 63) == 0) {
2338 uint64_t t1
= (qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - t0
) / 1000000;
2339 if (t1
> MAX_WAIT
) {
2340 trace_ram_save_iterate_big_wait(t1
, i
);
2346 flush_compressed_data(rs
);
2350 * Must occur before EOS (or any QEMUFile operation)
2351 * because of RDMA protocol.
2353 ram_control_after_iterate(f
, RAM_CONTROL_ROUND
);
2356 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2357 ram_counters
.transferred
+= 8;
2359 ret
= qemu_file_get_error(f
);
2368 * ram_save_complete: function called to send the remaining amount of ram
2370 * Returns zero to indicate success
2372 * Called with iothread lock
2374 * @f: QEMUFile where to send the data
2375 * @opaque: RAMState pointer
2377 static int ram_save_complete(QEMUFile
*f
, void *opaque
)
2379 RAMState
**temp
= opaque
;
2380 RAMState
*rs
= *temp
;
2384 if (!migration_in_postcopy()) {
2385 migration_bitmap_sync(rs
);
2388 ram_control_before_iterate(f
, RAM_CONTROL_FINISH
);
2390 /* try transferring iterative blocks of memory */
2392 /* flush all remaining blocks regardless of rate limiting */
2396 pages
= ram_find_and_save_block(rs
, !migration_in_colo_state());
2397 /* no more blocks to sent */
2403 flush_compressed_data(rs
);
2404 ram_control_after_iterate(f
, RAM_CONTROL_FINISH
);
2408 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2413 static void ram_save_pending(QEMUFile
*f
, void *opaque
, uint64_t max_size
,
2414 uint64_t *res_precopy_only
,
2415 uint64_t *res_compatible
,
2416 uint64_t *res_postcopy_only
)
2418 RAMState
**temp
= opaque
;
2419 RAMState
*rs
= *temp
;
2420 uint64_t remaining_size
;
2422 remaining_size
= rs
->migration_dirty_pages
* TARGET_PAGE_SIZE
;
2424 if (!migration_in_postcopy() &&
2425 remaining_size
< max_size
) {
2426 qemu_mutex_lock_iothread();
2428 migration_bitmap_sync(rs
);
2430 qemu_mutex_unlock_iothread();
2431 remaining_size
= rs
->migration_dirty_pages
* TARGET_PAGE_SIZE
;
2434 if (migrate_postcopy_ram()) {
2435 /* We can do postcopy, and all the data is postcopiable */
2436 *res_compatible
+= remaining_size
;
2438 *res_precopy_only
+= remaining_size
;
2442 static int load_xbzrle(QEMUFile
*f
, ram_addr_t addr
, void *host
)
2444 unsigned int xh_len
;
2446 uint8_t *loaded_data
;
2448 /* extract RLE header */
2449 xh_flags
= qemu_get_byte(f
);
2450 xh_len
= qemu_get_be16(f
);
2452 if (xh_flags
!= ENCODING_FLAG_XBZRLE
) {
2453 error_report("Failed to load XBZRLE page - wrong compression!");
2457 if (xh_len
> TARGET_PAGE_SIZE
) {
2458 error_report("Failed to load XBZRLE page - len overflow!");
2461 loaded_data
= XBZRLE
.decoded_buf
;
2462 /* load data and decode */
2463 /* it can change loaded_data to point to an internal buffer */
2464 qemu_get_buffer_in_place(f
, &loaded_data
, xh_len
);
2467 if (xbzrle_decode_buffer(loaded_data
, xh_len
, host
,
2468 TARGET_PAGE_SIZE
) == -1) {
2469 error_report("Failed to load XBZRLE page - decode error!");
2477 * ram_block_from_stream: read a RAMBlock id from the migration stream
2479 * Must be called from within a rcu critical section.
2481 * Returns a pointer from within the RCU-protected ram_list.
2483 * @f: QEMUFile where to read the data from
2484 * @flags: Page flags (mostly to see if it's a continuation of previous block)
2486 static inline RAMBlock
*ram_block_from_stream(QEMUFile
*f
, int flags
)
2488 static RAMBlock
*block
= NULL
;
2492 if (flags
& RAM_SAVE_FLAG_CONTINUE
) {
2494 error_report("Ack, bad migration stream!");
2500 len
= qemu_get_byte(f
);
2501 qemu_get_buffer(f
, (uint8_t *)id
, len
);
2504 block
= qemu_ram_block_by_name(id
);
2506 error_report("Can't find block %s", id
);
2513 static inline void *host_from_ram_block_offset(RAMBlock
*block
,
2516 if (!offset_in_ramblock(block
, offset
)) {
2520 return block
->host
+ offset
;
2524 * ram_handle_compressed: handle the zero page case
2526 * If a page (or a whole RDMA chunk) has been
2527 * determined to be zero, then zap it.
2529 * @host: host address for the zero page
2530 * @ch: what the page is filled from. We only support zero
2531 * @size: size of the zero page
2533 void ram_handle_compressed(void *host
, uint8_t ch
, uint64_t size
)
2535 if (ch
!= 0 || !is_zero_range(host
, size
)) {
2536 memset(host
, ch
, size
);
2540 /* return the size after decompression, or negative value on error */
2542 qemu_uncompress_data(z_stream
*stream
, uint8_t *dest
, size_t dest_len
,
2543 const uint8_t *source
, size_t source_len
)
2547 err
= inflateReset(stream
);
2552 stream
->avail_in
= source_len
;
2553 stream
->next_in
= (uint8_t *)source
;
2554 stream
->avail_out
= dest_len
;
2555 stream
->next_out
= dest
;
2557 err
= inflate(stream
, Z_NO_FLUSH
);
2558 if (err
!= Z_STREAM_END
) {
2562 return stream
->total_out
;
2565 static void *do_data_decompress(void *opaque
)
2567 DecompressParam
*param
= opaque
;
2568 unsigned long pagesize
;
2572 qemu_mutex_lock(¶m
->mutex
);
2573 while (!param
->quit
) {
2578 qemu_mutex_unlock(¶m
->mutex
);
2580 pagesize
= TARGET_PAGE_SIZE
;
2582 ret
= qemu_uncompress_data(¶m
->stream
, des
, pagesize
,
2583 param
->compbuf
, len
);
2585 error_report("decompress data failed");
2586 qemu_file_set_error(decomp_file
, ret
);
2589 qemu_mutex_lock(&decomp_done_lock
);
2591 qemu_cond_signal(&decomp_done_cond
);
2592 qemu_mutex_unlock(&decomp_done_lock
);
2594 qemu_mutex_lock(¶m
->mutex
);
2596 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
2599 qemu_mutex_unlock(¶m
->mutex
);
2604 static int wait_for_decompress_done(void)
2606 int idx
, thread_count
;
2608 if (!migrate_use_compression()) {
2612 thread_count
= migrate_decompress_threads();
2613 qemu_mutex_lock(&decomp_done_lock
);
2614 for (idx
= 0; idx
< thread_count
; idx
++) {
2615 while (!decomp_param
[idx
].done
) {
2616 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2619 qemu_mutex_unlock(&decomp_done_lock
);
2620 return qemu_file_get_error(decomp_file
);
2623 static void compress_threads_load_cleanup(void)
2625 int i
, thread_count
;
2627 if (!migrate_use_compression()) {
2630 thread_count
= migrate_decompress_threads();
2631 for (i
= 0; i
< thread_count
; i
++) {
2633 * we use it as a indicator which shows if the thread is
2634 * properly init'd or not
2636 if (!decomp_param
[i
].compbuf
) {
2640 qemu_mutex_lock(&decomp_param
[i
].mutex
);
2641 decomp_param
[i
].quit
= true;
2642 qemu_cond_signal(&decomp_param
[i
].cond
);
2643 qemu_mutex_unlock(&decomp_param
[i
].mutex
);
2645 for (i
= 0; i
< thread_count
; i
++) {
2646 if (!decomp_param
[i
].compbuf
) {
2650 qemu_thread_join(decompress_threads
+ i
);
2651 qemu_mutex_destroy(&decomp_param
[i
].mutex
);
2652 qemu_cond_destroy(&decomp_param
[i
].cond
);
2653 inflateEnd(&decomp_param
[i
].stream
);
2654 g_free(decomp_param
[i
].compbuf
);
2655 decomp_param
[i
].compbuf
= NULL
;
2657 g_free(decompress_threads
);
2658 g_free(decomp_param
);
2659 decompress_threads
= NULL
;
2660 decomp_param
= NULL
;
2664 static int compress_threads_load_setup(QEMUFile
*f
)
2666 int i
, thread_count
;
2668 if (!migrate_use_compression()) {
2672 thread_count
= migrate_decompress_threads();
2673 decompress_threads
= g_new0(QemuThread
, thread_count
);
2674 decomp_param
= g_new0(DecompressParam
, thread_count
);
2675 qemu_mutex_init(&decomp_done_lock
);
2676 qemu_cond_init(&decomp_done_cond
);
2678 for (i
= 0; i
< thread_count
; i
++) {
2679 if (inflateInit(&decomp_param
[i
].stream
) != Z_OK
) {
2683 decomp_param
[i
].compbuf
= g_malloc0(compressBound(TARGET_PAGE_SIZE
));
2684 qemu_mutex_init(&decomp_param
[i
].mutex
);
2685 qemu_cond_init(&decomp_param
[i
].cond
);
2686 decomp_param
[i
].done
= true;
2687 decomp_param
[i
].quit
= false;
2688 qemu_thread_create(decompress_threads
+ i
, "decompress",
2689 do_data_decompress
, decomp_param
+ i
,
2690 QEMU_THREAD_JOINABLE
);
2694 compress_threads_load_cleanup();
2698 static void decompress_data_with_multi_threads(QEMUFile
*f
,
2699 void *host
, int len
)
2701 int idx
, thread_count
;
2703 thread_count
= migrate_decompress_threads();
2704 qemu_mutex_lock(&decomp_done_lock
);
2706 for (idx
= 0; idx
< thread_count
; idx
++) {
2707 if (decomp_param
[idx
].done
) {
2708 decomp_param
[idx
].done
= false;
2709 qemu_mutex_lock(&decomp_param
[idx
].mutex
);
2710 qemu_get_buffer(f
, decomp_param
[idx
].compbuf
, len
);
2711 decomp_param
[idx
].des
= host
;
2712 decomp_param
[idx
].len
= len
;
2713 qemu_cond_signal(&decomp_param
[idx
].cond
);
2714 qemu_mutex_unlock(&decomp_param
[idx
].mutex
);
2718 if (idx
< thread_count
) {
2721 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2724 qemu_mutex_unlock(&decomp_done_lock
);
2728 * ram_load_setup: Setup RAM for migration incoming side
2730 * Returns zero to indicate success and negative for error
2732 * @f: QEMUFile where to receive the data
2733 * @opaque: RAMState pointer
2735 static int ram_load_setup(QEMUFile
*f
, void *opaque
)
2737 if (compress_threads_load_setup(f
)) {
2741 xbzrle_load_setup();
2742 ramblock_recv_map_init();
2746 static int ram_load_cleanup(void *opaque
)
2749 xbzrle_load_cleanup();
2750 compress_threads_load_cleanup();
2752 RAMBLOCK_FOREACH(rb
) {
2753 g_free(rb
->receivedmap
);
2754 rb
->receivedmap
= NULL
;
2760 * ram_postcopy_incoming_init: allocate postcopy data structures
2762 * Returns 0 for success and negative if there was one error
2764 * @mis: current migration incoming state
2766 * Allocate data structures etc needed by incoming migration with
2767 * postcopy-ram. postcopy-ram's similarly names
2768 * postcopy_ram_incoming_init does the work.
2770 int ram_postcopy_incoming_init(MigrationIncomingState
*mis
)
2772 unsigned long ram_pages
= last_ram_page();
2774 return postcopy_ram_incoming_init(mis
, ram_pages
);
2778 * ram_load_postcopy: load a page in postcopy case
2780 * Returns 0 for success or -errno in case of error
2782 * Called in postcopy mode by ram_load().
2783 * rcu_read_lock is taken prior to this being called.
2785 * @f: QEMUFile where to send the data
2787 static int ram_load_postcopy(QEMUFile
*f
)
2789 int flags
= 0, ret
= 0;
2790 bool place_needed
= false;
2791 bool matching_page_sizes
= false;
2792 MigrationIncomingState
*mis
= migration_incoming_get_current();
2793 /* Temporary page that is later 'placed' */
2794 void *postcopy_host_page
= postcopy_get_tmp_page(mis
);
2795 void *last_host
= NULL
;
2796 bool all_zero
= false;
2798 while (!ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
2801 void *page_buffer
= NULL
;
2802 void *place_source
= NULL
;
2803 RAMBlock
*block
= NULL
;
2806 addr
= qemu_get_be64(f
);
2809 * If qemu file error, we should stop here, and then "addr"
2812 ret
= qemu_file_get_error(f
);
2817 flags
= addr
& ~TARGET_PAGE_MASK
;
2818 addr
&= TARGET_PAGE_MASK
;
2820 trace_ram_load_postcopy_loop((uint64_t)addr
, flags
);
2821 place_needed
= false;
2822 if (flags
& (RAM_SAVE_FLAG_ZERO
| RAM_SAVE_FLAG_PAGE
)) {
2823 block
= ram_block_from_stream(f
, flags
);
2825 host
= host_from_ram_block_offset(block
, addr
);
2827 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
2831 matching_page_sizes
= block
->page_size
== TARGET_PAGE_SIZE
;
2833 * Postcopy requires that we place whole host pages atomically;
2834 * these may be huge pages for RAMBlocks that are backed by
2836 * To make it atomic, the data is read into a temporary page
2837 * that's moved into place later.
2838 * The migration protocol uses, possibly smaller, target-pages
2839 * however the source ensures it always sends all the components
2840 * of a host page in order.
2842 page_buffer
= postcopy_host_page
+
2843 ((uintptr_t)host
& (block
->page_size
- 1));
2844 /* If all TP are zero then we can optimise the place */
2845 if (!((uintptr_t)host
& (block
->page_size
- 1))) {
2848 /* not the 1st TP within the HP */
2849 if (host
!= (last_host
+ TARGET_PAGE_SIZE
)) {
2850 error_report("Non-sequential target page %p/%p",
2859 * If it's the last part of a host page then we place the host
2862 place_needed
= (((uintptr_t)host
+ TARGET_PAGE_SIZE
) &
2863 (block
->page_size
- 1)) == 0;
2864 place_source
= postcopy_host_page
;
2868 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
2869 case RAM_SAVE_FLAG_ZERO
:
2870 ch
= qemu_get_byte(f
);
2871 memset(page_buffer
, ch
, TARGET_PAGE_SIZE
);
2877 case RAM_SAVE_FLAG_PAGE
:
2879 if (!place_needed
|| !matching_page_sizes
) {
2880 qemu_get_buffer(f
, page_buffer
, TARGET_PAGE_SIZE
);
2882 /* Avoids the qemu_file copy during postcopy, which is
2883 * going to do a copy later; can only do it when we
2884 * do this read in one go (matching page sizes)
2886 qemu_get_buffer_in_place(f
, (uint8_t **)&place_source
,
2890 case RAM_SAVE_FLAG_EOS
:
2894 error_report("Unknown combination of migration flags: %#x"
2895 " (postcopy mode)", flags
);
2900 /* Detect for any possible file errors */
2901 if (!ret
&& qemu_file_get_error(f
)) {
2902 ret
= qemu_file_get_error(f
);
2905 if (!ret
&& place_needed
) {
2906 /* This gets called at the last target page in the host page */
2907 void *place_dest
= host
+ TARGET_PAGE_SIZE
- block
->page_size
;
2910 ret
= postcopy_place_page_zero(mis
, place_dest
,
2913 ret
= postcopy_place_page(mis
, place_dest
,
2914 place_source
, block
);
2922 static bool postcopy_is_advised(void)
2924 PostcopyState ps
= postcopy_state_get();
2925 return ps
>= POSTCOPY_INCOMING_ADVISE
&& ps
< POSTCOPY_INCOMING_END
;
2928 static bool postcopy_is_running(void)
2930 PostcopyState ps
= postcopy_state_get();
2931 return ps
>= POSTCOPY_INCOMING_LISTENING
&& ps
< POSTCOPY_INCOMING_END
;
2934 static int ram_load(QEMUFile
*f
, void *opaque
, int version_id
)
2936 int flags
= 0, ret
= 0, invalid_flags
= 0;
2937 static uint64_t seq_iter
;
2940 * If system is running in postcopy mode, page inserts to host memory must
2943 bool postcopy_running
= postcopy_is_running();
2944 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2945 bool postcopy_advised
= postcopy_is_advised();
2949 if (version_id
!= 4) {
2953 if (!migrate_use_compression()) {
2954 invalid_flags
|= RAM_SAVE_FLAG_COMPRESS_PAGE
;
2956 /* This RCU critical section can be very long running.
2957 * When RCU reclaims in the code start to become numerous,
2958 * it will be necessary to reduce the granularity of this
2963 if (postcopy_running
) {
2964 ret
= ram_load_postcopy(f
);
2967 while (!postcopy_running
&& !ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
2968 ram_addr_t addr
, total_ram_bytes
;
2972 addr
= qemu_get_be64(f
);
2973 flags
= addr
& ~TARGET_PAGE_MASK
;
2974 addr
&= TARGET_PAGE_MASK
;
2976 if (flags
& invalid_flags
) {
2977 if (flags
& invalid_flags
& RAM_SAVE_FLAG_COMPRESS_PAGE
) {
2978 error_report("Received an unexpected compressed page");
2985 if (flags
& (RAM_SAVE_FLAG_ZERO
| RAM_SAVE_FLAG_PAGE
|
2986 RAM_SAVE_FLAG_COMPRESS_PAGE
| RAM_SAVE_FLAG_XBZRLE
)) {
2987 RAMBlock
*block
= ram_block_from_stream(f
, flags
);
2989 host
= host_from_ram_block_offset(block
, addr
);
2991 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
2995 ramblock_recv_bitmap_set(block
, host
);
2996 trace_ram_load_loop(block
->idstr
, (uint64_t)addr
, flags
, host
);
2999 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
3000 case RAM_SAVE_FLAG_MEM_SIZE
:
3001 /* Synchronize RAM block list */
3002 total_ram_bytes
= addr
;
3003 while (!ret
&& total_ram_bytes
) {
3008 len
= qemu_get_byte(f
);
3009 qemu_get_buffer(f
, (uint8_t *)id
, len
);
3011 length
= qemu_get_be64(f
);
3013 block
= qemu_ram_block_by_name(id
);
3015 if (length
!= block
->used_length
) {
3016 Error
*local_err
= NULL
;
3018 ret
= qemu_ram_resize(block
, length
,
3021 error_report_err(local_err
);
3024 /* For postcopy we need to check hugepage sizes match */
3025 if (postcopy_advised
&&
3026 block
->page_size
!= qemu_host_page_size
) {
3027 uint64_t remote_page_size
= qemu_get_be64(f
);
3028 if (remote_page_size
!= block
->page_size
) {
3029 error_report("Mismatched RAM page size %s "
3030 "(local) %zd != %" PRId64
,
3031 id
, block
->page_size
,
3036 ram_control_load_hook(f
, RAM_CONTROL_BLOCK_REG
,
3039 error_report("Unknown ramblock \"%s\", cannot "
3040 "accept migration", id
);
3044 total_ram_bytes
-= length
;
3048 case RAM_SAVE_FLAG_ZERO
:
3049 ch
= qemu_get_byte(f
);
3050 ram_handle_compressed(host
, ch
, TARGET_PAGE_SIZE
);
3053 case RAM_SAVE_FLAG_PAGE
:
3054 qemu_get_buffer(f
, host
, TARGET_PAGE_SIZE
);
3057 case RAM_SAVE_FLAG_COMPRESS_PAGE
:
3058 len
= qemu_get_be32(f
);
3059 if (len
< 0 || len
> compressBound(TARGET_PAGE_SIZE
)) {
3060 error_report("Invalid compressed data length: %d", len
);
3064 decompress_data_with_multi_threads(f
, host
, len
);
3067 case RAM_SAVE_FLAG_XBZRLE
:
3068 if (load_xbzrle(f
, addr
, host
) < 0) {
3069 error_report("Failed to decompress XBZRLE page at "
3070 RAM_ADDR_FMT
, addr
);
3075 case RAM_SAVE_FLAG_EOS
:
3079 if (flags
& RAM_SAVE_FLAG_HOOK
) {
3080 ram_control_load_hook(f
, RAM_CONTROL_HOOK
, NULL
);
3082 error_report("Unknown combination of migration flags: %#x",
3088 ret
= qemu_file_get_error(f
);
3092 ret
|= wait_for_decompress_done();
3094 trace_ram_load_complete(ret
, seq_iter
);
3098 static bool ram_has_postcopy(void *opaque
)
3100 return migrate_postcopy_ram();
3103 static SaveVMHandlers savevm_ram_handlers
= {
3104 .save_setup
= ram_save_setup
,
3105 .save_live_iterate
= ram_save_iterate
,
3106 .save_live_complete_postcopy
= ram_save_complete
,
3107 .save_live_complete_precopy
= ram_save_complete
,
3108 .has_postcopy
= ram_has_postcopy
,
3109 .save_live_pending
= ram_save_pending
,
3110 .load_state
= ram_load
,
3111 .save_cleanup
= ram_save_cleanup
,
3112 .load_setup
= ram_load_setup
,
3113 .load_cleanup
= ram_load_cleanup
,
3116 void ram_mig_init(void)
3118 qemu_mutex_init(&XBZRLE
.lock
);
3119 register_savevm_live(NULL
, "ram", 0, 4, &savevm_ram_handlers
, &ram_state
);