4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 #include "qemu/osdep.h"
32 #include "qemu/cutils.h"
33 #include "qemu/bitops.h"
34 #include "qemu/bitmap.h"
35 #include "qemu/main-loop.h"
38 #include "migration.h"
39 #include "migration/register.h"
40 #include "migration/misc.h"
41 #include "qemu-file.h"
42 #include "postcopy-ram.h"
43 #include "migration/page_cache.h"
44 #include "qemu/error-report.h"
45 #include "qapi/error.h"
46 #include "qapi/qapi-events-migration.h"
47 #include "qapi/qmp/qerror.h"
49 #include "exec/ram_addr.h"
50 #include "exec/target_page.h"
51 #include "qemu/rcu_queue.h"
52 #include "migration/colo.h"
53 #include "migration/block.h"
55 /***********************************************************/
56 /* ram save/restore */
58 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
59 * worked for pages that where filled with the same char. We switched
60 * it to only search for the zero value. And to avoid confusion with
61 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
64 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
65 #define RAM_SAVE_FLAG_ZERO 0x02
66 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
67 #define RAM_SAVE_FLAG_PAGE 0x08
68 #define RAM_SAVE_FLAG_EOS 0x10
69 #define RAM_SAVE_FLAG_CONTINUE 0x20
70 #define RAM_SAVE_FLAG_XBZRLE 0x40
71 /* 0x80 is reserved in migration.h start with 0x100 next */
72 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
74 static inline bool is_zero_range(uint8_t *p
, uint64_t size
)
76 return buffer_is_zero(p
, size
);
79 XBZRLECacheStats xbzrle_counters
;
81 /* struct contains XBZRLE cache and a static page
82 used by the compression */
84 /* buffer used for XBZRLE encoding */
86 /* buffer for storing page content */
88 /* Cache for XBZRLE, Protected by lock. */
91 /* it will store a page full of zeros */
92 uint8_t *zero_target_page
;
93 /* buffer used for XBZRLE decoding */
97 static void XBZRLE_cache_lock(void)
99 if (migrate_use_xbzrle())
100 qemu_mutex_lock(&XBZRLE
.lock
);
103 static void XBZRLE_cache_unlock(void)
105 if (migrate_use_xbzrle())
106 qemu_mutex_unlock(&XBZRLE
.lock
);
110 * xbzrle_cache_resize: resize the xbzrle cache
112 * This function is called from qmp_migrate_set_cache_size in main
113 * thread, possibly while a migration is in progress. A running
114 * migration may be using the cache and might finish during this call,
115 * hence changes to the cache are protected by XBZRLE.lock().
117 * Returns 0 for success or -1 for error
119 * @new_size: new cache size
120 * @errp: set *errp if the check failed, with reason
122 int xbzrle_cache_resize(int64_t new_size
, Error
**errp
)
124 PageCache
*new_cache
;
127 /* Check for truncation */
128 if (new_size
!= (size_t)new_size
) {
129 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cache size",
130 "exceeding address space");
134 if (new_size
== migrate_xbzrle_cache_size()) {
141 if (XBZRLE
.cache
!= NULL
) {
142 new_cache
= cache_init(new_size
, TARGET_PAGE_SIZE
, errp
);
148 cache_fini(XBZRLE
.cache
);
149 XBZRLE
.cache
= new_cache
;
152 XBZRLE_cache_unlock();
156 static void ramblock_recv_map_init(void)
160 RAMBLOCK_FOREACH(rb
) {
161 assert(!rb
->receivedmap
);
162 rb
->receivedmap
= bitmap_new(rb
->max_length
>> qemu_target_page_bits());
166 int ramblock_recv_bitmap_test(RAMBlock
*rb
, void *host_addr
)
168 return test_bit(ramblock_recv_bitmap_offset(host_addr
, rb
),
172 bool ramblock_recv_bitmap_test_byte_offset(RAMBlock
*rb
, uint64_t byte_offset
)
174 return test_bit(byte_offset
>> TARGET_PAGE_BITS
, rb
->receivedmap
);
177 void ramblock_recv_bitmap_set(RAMBlock
*rb
, void *host_addr
)
179 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr
, rb
), rb
->receivedmap
);
182 void ramblock_recv_bitmap_set_range(RAMBlock
*rb
, void *host_addr
,
185 bitmap_set_atomic(rb
->receivedmap
,
186 ramblock_recv_bitmap_offset(host_addr
, rb
),
191 * An outstanding page request, on the source, having been received
194 struct RAMSrcPageRequest
{
199 QSIMPLEQ_ENTRY(RAMSrcPageRequest
) next_req
;
202 /* State of RAM for migration */
204 /* QEMUFile used for this migration */
206 /* Last block that we have visited searching for dirty pages */
207 RAMBlock
*last_seen_block
;
208 /* Last block from where we have sent data */
209 RAMBlock
*last_sent_block
;
210 /* Last dirty target page we have sent */
211 ram_addr_t last_page
;
212 /* last ram version we have seen */
213 uint32_t last_version
;
214 /* We are in the first round */
216 /* How many times we have dirty too many pages */
217 int dirty_rate_high_cnt
;
218 /* these variables are used for bitmap sync */
219 /* last time we did a full bitmap_sync */
220 int64_t time_last_bitmap_sync
;
221 /* bytes transferred at start_time */
222 uint64_t bytes_xfer_prev
;
223 /* number of dirty pages since start_time */
224 uint64_t num_dirty_pages_period
;
225 /* xbzrle misses since the beginning of the period */
226 uint64_t xbzrle_cache_miss_prev
;
227 /* number of iterations at the beginning of period */
228 uint64_t iterations_prev
;
229 /* Iterations since start */
231 /* number of dirty bits in the bitmap */
232 uint64_t migration_dirty_pages
;
233 /* protects modification of the bitmap */
234 QemuMutex bitmap_mutex
;
235 /* The RAMBlock used in the last src_page_requests */
236 RAMBlock
*last_req_rb
;
237 /* Queue of outstanding page requests from the destination */
238 QemuMutex src_page_req_mutex
;
239 QSIMPLEQ_HEAD(src_page_requests
, RAMSrcPageRequest
) src_page_requests
;
241 typedef struct RAMState RAMState
;
243 static RAMState
*ram_state
;
245 uint64_t ram_bytes_remaining(void)
247 return ram_state
? (ram_state
->migration_dirty_pages
* TARGET_PAGE_SIZE
) :
251 MigrationStats ram_counters
;
253 /* used by the search for pages to send */
254 struct PageSearchStatus
{
255 /* Current block being searched */
257 /* Current page to search from */
259 /* Set once we wrap around */
262 typedef struct PageSearchStatus PageSearchStatus
;
264 struct CompressParam
{
273 typedef struct CompressParam CompressParam
;
275 struct DecompressParam
{
284 typedef struct DecompressParam DecompressParam
;
286 static CompressParam
*comp_param
;
287 static QemuThread
*compress_threads
;
288 /* comp_done_cond is used to wake up the migration thread when
289 * one of the compression threads has finished the compression.
290 * comp_done_lock is used to co-work with comp_done_cond.
292 static QemuMutex comp_done_lock
;
293 static QemuCond comp_done_cond
;
294 /* The empty QEMUFileOps will be used by file in CompressParam */
295 static const QEMUFileOps empty_ops
= { };
297 static DecompressParam
*decomp_param
;
298 static QemuThread
*decompress_threads
;
299 static QemuMutex decomp_done_lock
;
300 static QemuCond decomp_done_cond
;
302 static int do_compress_ram_page(QEMUFile
*f
, RAMBlock
*block
,
305 static void *do_data_compress(void *opaque
)
307 CompressParam
*param
= opaque
;
311 qemu_mutex_lock(¶m
->mutex
);
312 while (!param
->quit
) {
314 block
= param
->block
;
315 offset
= param
->offset
;
317 qemu_mutex_unlock(¶m
->mutex
);
319 do_compress_ram_page(param
->file
, block
, offset
);
321 qemu_mutex_lock(&comp_done_lock
);
323 qemu_cond_signal(&comp_done_cond
);
324 qemu_mutex_unlock(&comp_done_lock
);
326 qemu_mutex_lock(¶m
->mutex
);
328 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
331 qemu_mutex_unlock(¶m
->mutex
);
336 static inline void terminate_compression_threads(void)
338 int idx
, thread_count
;
340 thread_count
= migrate_compress_threads();
342 for (idx
= 0; idx
< thread_count
; idx
++) {
343 qemu_mutex_lock(&comp_param
[idx
].mutex
);
344 comp_param
[idx
].quit
= true;
345 qemu_cond_signal(&comp_param
[idx
].cond
);
346 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
350 static void compress_threads_save_cleanup(void)
354 if (!migrate_use_compression()) {
357 terminate_compression_threads();
358 thread_count
= migrate_compress_threads();
359 for (i
= 0; i
< thread_count
; i
++) {
360 qemu_thread_join(compress_threads
+ i
);
361 qemu_fclose(comp_param
[i
].file
);
362 qemu_mutex_destroy(&comp_param
[i
].mutex
);
363 qemu_cond_destroy(&comp_param
[i
].cond
);
365 qemu_mutex_destroy(&comp_done_lock
);
366 qemu_cond_destroy(&comp_done_cond
);
367 g_free(compress_threads
);
369 compress_threads
= NULL
;
373 static void compress_threads_save_setup(void)
377 if (!migrate_use_compression()) {
380 thread_count
= migrate_compress_threads();
381 compress_threads
= g_new0(QemuThread
, thread_count
);
382 comp_param
= g_new0(CompressParam
, thread_count
);
383 qemu_cond_init(&comp_done_cond
);
384 qemu_mutex_init(&comp_done_lock
);
385 for (i
= 0; i
< thread_count
; i
++) {
386 /* comp_param[i].file is just used as a dummy buffer to save data,
387 * set its ops to empty.
389 comp_param
[i
].file
= qemu_fopen_ops(NULL
, &empty_ops
);
390 comp_param
[i
].done
= true;
391 comp_param
[i
].quit
= false;
392 qemu_mutex_init(&comp_param
[i
].mutex
);
393 qemu_cond_init(&comp_param
[i
].cond
);
394 qemu_thread_create(compress_threads
+ i
, "compress",
395 do_data_compress
, comp_param
+ i
,
396 QEMU_THREAD_JOINABLE
);
402 struct MultiFDSendParams
{
410 typedef struct MultiFDSendParams MultiFDSendParams
;
413 MultiFDSendParams
*params
;
414 /* number of created threads */
416 } *multifd_send_state
;
418 static void terminate_multifd_send_threads(Error
*errp
)
422 for (i
= 0; i
< multifd_send_state
->count
; i
++) {
423 MultiFDSendParams
*p
= &multifd_send_state
->params
[i
];
425 qemu_mutex_lock(&p
->mutex
);
427 qemu_sem_post(&p
->sem
);
428 qemu_mutex_unlock(&p
->mutex
);
432 int multifd_save_cleanup(Error
**errp
)
437 if (!migrate_use_multifd()) {
440 terminate_multifd_send_threads(NULL
);
441 for (i
= 0; i
< multifd_send_state
->count
; i
++) {
442 MultiFDSendParams
*p
= &multifd_send_state
->params
[i
];
444 qemu_thread_join(&p
->thread
);
445 qemu_mutex_destroy(&p
->mutex
);
446 qemu_sem_destroy(&p
->sem
);
450 g_free(multifd_send_state
->params
);
451 multifd_send_state
->params
= NULL
;
452 g_free(multifd_send_state
);
453 multifd_send_state
= NULL
;
457 static void *multifd_send_thread(void *opaque
)
459 MultiFDSendParams
*p
= opaque
;
462 qemu_mutex_lock(&p
->mutex
);
464 qemu_mutex_unlock(&p
->mutex
);
467 qemu_mutex_unlock(&p
->mutex
);
468 qemu_sem_wait(&p
->sem
);
474 int multifd_save_setup(void)
479 if (!migrate_use_multifd()) {
482 thread_count
= migrate_multifd_channels();
483 multifd_send_state
= g_malloc0(sizeof(*multifd_send_state
));
484 multifd_send_state
->params
= g_new0(MultiFDSendParams
, thread_count
);
485 multifd_send_state
->count
= 0;
486 for (i
= 0; i
< thread_count
; i
++) {
487 MultiFDSendParams
*p
= &multifd_send_state
->params
[i
];
489 qemu_mutex_init(&p
->mutex
);
490 qemu_sem_init(&p
->sem
, 0);
493 p
->name
= g_strdup_printf("multifdsend_%d", i
);
494 qemu_thread_create(&p
->thread
, p
->name
, multifd_send_thread
, p
,
495 QEMU_THREAD_JOINABLE
);
497 multifd_send_state
->count
++;
502 struct MultiFDRecvParams
{
510 typedef struct MultiFDRecvParams MultiFDRecvParams
;
513 MultiFDRecvParams
*params
;
514 /* number of created threads */
516 } *multifd_recv_state
;
518 static void terminate_multifd_recv_threads(Error
*errp
)
522 for (i
= 0; i
< multifd_recv_state
->count
; i
++) {
523 MultiFDRecvParams
*p
= &multifd_recv_state
->params
[i
];
525 qemu_mutex_lock(&p
->mutex
);
527 qemu_sem_post(&p
->sem
);
528 qemu_mutex_unlock(&p
->mutex
);
532 int multifd_load_cleanup(Error
**errp
)
537 if (!migrate_use_multifd()) {
540 terminate_multifd_recv_threads(NULL
);
541 for (i
= 0; i
< multifd_recv_state
->count
; i
++) {
542 MultiFDRecvParams
*p
= &multifd_recv_state
->params
[i
];
544 qemu_thread_join(&p
->thread
);
545 qemu_mutex_destroy(&p
->mutex
);
546 qemu_sem_destroy(&p
->sem
);
550 g_free(multifd_recv_state
->params
);
551 multifd_recv_state
->params
= NULL
;
552 g_free(multifd_recv_state
);
553 multifd_recv_state
= NULL
;
558 static void *multifd_recv_thread(void *opaque
)
560 MultiFDRecvParams
*p
= opaque
;
563 qemu_mutex_lock(&p
->mutex
);
565 qemu_mutex_unlock(&p
->mutex
);
568 qemu_mutex_unlock(&p
->mutex
);
569 qemu_sem_wait(&p
->sem
);
575 int multifd_load_setup(void)
580 if (!migrate_use_multifd()) {
583 thread_count
= migrate_multifd_channels();
584 multifd_recv_state
= g_malloc0(sizeof(*multifd_recv_state
));
585 multifd_recv_state
->params
= g_new0(MultiFDRecvParams
, thread_count
);
586 multifd_recv_state
->count
= 0;
587 for (i
= 0; i
< thread_count
; i
++) {
588 MultiFDRecvParams
*p
= &multifd_recv_state
->params
[i
];
590 qemu_mutex_init(&p
->mutex
);
591 qemu_sem_init(&p
->sem
, 0);
594 p
->name
= g_strdup_printf("multifdrecv_%d", i
);
595 qemu_thread_create(&p
->thread
, p
->name
, multifd_recv_thread
, p
,
596 QEMU_THREAD_JOINABLE
);
597 multifd_recv_state
->count
++;
603 * save_page_header: write page header to wire
605 * If this is the 1st block, it also writes the block identification
607 * Returns the number of bytes written
609 * @f: QEMUFile where to send the data
610 * @block: block that contains the page we want to send
611 * @offset: offset inside the block for the page
612 * in the lower bits, it contains flags
614 static size_t save_page_header(RAMState
*rs
, QEMUFile
*f
, RAMBlock
*block
,
619 if (block
== rs
->last_sent_block
) {
620 offset
|= RAM_SAVE_FLAG_CONTINUE
;
622 qemu_put_be64(f
, offset
);
625 if (!(offset
& RAM_SAVE_FLAG_CONTINUE
)) {
626 len
= strlen(block
->idstr
);
627 qemu_put_byte(f
, len
);
628 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, len
);
630 rs
->last_sent_block
= block
;
636 * mig_throttle_guest_down: throotle down the guest
638 * Reduce amount of guest cpu execution to hopefully slow down memory
639 * writes. If guest dirty memory rate is reduced below the rate at
640 * which we can transfer pages to the destination then we should be
641 * able to complete migration. Some workloads dirty memory way too
642 * fast and will not effectively converge, even with auto-converge.
644 static void mig_throttle_guest_down(void)
646 MigrationState
*s
= migrate_get_current();
647 uint64_t pct_initial
= s
->parameters
.cpu_throttle_initial
;
648 uint64_t pct_icrement
= s
->parameters
.cpu_throttle_increment
;
650 /* We have not started throttling yet. Let's start it. */
651 if (!cpu_throttle_active()) {
652 cpu_throttle_set(pct_initial
);
654 /* Throttling already on, just increase the rate */
655 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement
);
660 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
662 * @rs: current RAM state
663 * @current_addr: address for the zero page
665 * Update the xbzrle cache to reflect a page that's been sent as all 0.
666 * The important thing is that a stale (not-yet-0'd) page be replaced
668 * As a bonus, if the page wasn't in the cache it gets added so that
669 * when a small write is made into the 0'd page it gets XBZRLE sent.
671 static void xbzrle_cache_zero_page(RAMState
*rs
, ram_addr_t current_addr
)
673 if (rs
->ram_bulk_stage
|| !migrate_use_xbzrle()) {
677 /* We don't care if this fails to allocate a new cache page
678 * as long as it updated an old one */
679 cache_insert(XBZRLE
.cache
, current_addr
, XBZRLE
.zero_target_page
,
680 ram_counters
.dirty_sync_count
);
683 #define ENCODING_FLAG_XBZRLE 0x1
686 * save_xbzrle_page: compress and send current page
688 * Returns: 1 means that we wrote the page
689 * 0 means that page is identical to the one already sent
690 * -1 means that xbzrle would be longer than normal
692 * @rs: current RAM state
693 * @current_data: pointer to the address of the page contents
694 * @current_addr: addr of the page
695 * @block: block that contains the page we want to send
696 * @offset: offset inside the block for the page
697 * @last_stage: if we are at the completion stage
699 static int save_xbzrle_page(RAMState
*rs
, uint8_t **current_data
,
700 ram_addr_t current_addr
, RAMBlock
*block
,
701 ram_addr_t offset
, bool last_stage
)
703 int encoded_len
= 0, bytes_xbzrle
;
704 uint8_t *prev_cached_page
;
706 if (!cache_is_cached(XBZRLE
.cache
, current_addr
,
707 ram_counters
.dirty_sync_count
)) {
708 xbzrle_counters
.cache_miss
++;
710 if (cache_insert(XBZRLE
.cache
, current_addr
, *current_data
,
711 ram_counters
.dirty_sync_count
) == -1) {
714 /* update *current_data when the page has been
715 inserted into cache */
716 *current_data
= get_cached_data(XBZRLE
.cache
, current_addr
);
722 prev_cached_page
= get_cached_data(XBZRLE
.cache
, current_addr
);
724 /* save current buffer into memory */
725 memcpy(XBZRLE
.current_buf
, *current_data
, TARGET_PAGE_SIZE
);
727 /* XBZRLE encoding (if there is no overflow) */
728 encoded_len
= xbzrle_encode_buffer(prev_cached_page
, XBZRLE
.current_buf
,
729 TARGET_PAGE_SIZE
, XBZRLE
.encoded_buf
,
731 if (encoded_len
== 0) {
732 trace_save_xbzrle_page_skipping();
734 } else if (encoded_len
== -1) {
735 trace_save_xbzrle_page_overflow();
736 xbzrle_counters
.overflow
++;
737 /* update data in the cache */
739 memcpy(prev_cached_page
, *current_data
, TARGET_PAGE_SIZE
);
740 *current_data
= prev_cached_page
;
745 /* we need to update the data in the cache, in order to get the same data */
747 memcpy(prev_cached_page
, XBZRLE
.current_buf
, TARGET_PAGE_SIZE
);
750 /* Send XBZRLE based compressed page */
751 bytes_xbzrle
= save_page_header(rs
, rs
->f
, block
,
752 offset
| RAM_SAVE_FLAG_XBZRLE
);
753 qemu_put_byte(rs
->f
, ENCODING_FLAG_XBZRLE
);
754 qemu_put_be16(rs
->f
, encoded_len
);
755 qemu_put_buffer(rs
->f
, XBZRLE
.encoded_buf
, encoded_len
);
756 bytes_xbzrle
+= encoded_len
+ 1 + 2;
757 xbzrle_counters
.pages
++;
758 xbzrle_counters
.bytes
+= bytes_xbzrle
;
759 ram_counters
.transferred
+= bytes_xbzrle
;
765 * migration_bitmap_find_dirty: find the next dirty page from start
767 * Called with rcu_read_lock() to protect migration_bitmap
769 * Returns the byte offset within memory region of the start of a dirty page
771 * @rs: current RAM state
772 * @rb: RAMBlock where to search for dirty pages
773 * @start: page where we start the search
776 unsigned long migration_bitmap_find_dirty(RAMState
*rs
, RAMBlock
*rb
,
779 unsigned long size
= rb
->used_length
>> TARGET_PAGE_BITS
;
780 unsigned long *bitmap
= rb
->bmap
;
783 if (rs
->ram_bulk_stage
&& start
> 0) {
786 next
= find_next_bit(bitmap
, size
, start
);
792 static inline bool migration_bitmap_clear_dirty(RAMState
*rs
,
798 ret
= test_and_clear_bit(page
, rb
->bmap
);
801 rs
->migration_dirty_pages
--;
806 static void migration_bitmap_sync_range(RAMState
*rs
, RAMBlock
*rb
,
807 ram_addr_t start
, ram_addr_t length
)
809 rs
->migration_dirty_pages
+=
810 cpu_physical_memory_sync_dirty_bitmap(rb
, start
, length
,
811 &rs
->num_dirty_pages_period
);
815 * ram_pagesize_summary: calculate all the pagesizes of a VM
817 * Returns a summary bitmap of the page sizes of all RAMBlocks
819 * For VMs with just normal pages this is equivalent to the host page
820 * size. If it's got some huge pages then it's the OR of all the
821 * different page sizes.
823 uint64_t ram_pagesize_summary(void)
826 uint64_t summary
= 0;
828 RAMBLOCK_FOREACH(block
) {
829 summary
|= block
->page_size
;
835 static void migration_bitmap_sync(RAMState
*rs
)
839 uint64_t bytes_xfer_now
;
841 ram_counters
.dirty_sync_count
++;
843 if (!rs
->time_last_bitmap_sync
) {
844 rs
->time_last_bitmap_sync
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
847 trace_migration_bitmap_sync_start();
848 memory_global_dirty_log_sync();
850 qemu_mutex_lock(&rs
->bitmap_mutex
);
852 RAMBLOCK_FOREACH(block
) {
853 migration_bitmap_sync_range(rs
, block
, 0, block
->used_length
);
856 qemu_mutex_unlock(&rs
->bitmap_mutex
);
858 trace_migration_bitmap_sync_end(rs
->num_dirty_pages_period
);
860 end_time
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
862 /* more than 1 second = 1000 millisecons */
863 if (end_time
> rs
->time_last_bitmap_sync
+ 1000) {
864 /* calculate period counters */
865 ram_counters
.dirty_pages_rate
= rs
->num_dirty_pages_period
* 1000
866 / (end_time
- rs
->time_last_bitmap_sync
);
867 bytes_xfer_now
= ram_counters
.transferred
;
869 /* During block migration the auto-converge logic incorrectly detects
870 * that ram migration makes no progress. Avoid this by disabling the
871 * throttling logic during the bulk phase of block migration. */
872 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
873 /* The following detection logic can be refined later. For now:
874 Check to see if the dirtied bytes is 50% more than the approx.
875 amount of bytes that just got transferred since the last time we
876 were in this routine. If that happens twice, start or increase
879 if ((rs
->num_dirty_pages_period
* TARGET_PAGE_SIZE
>
880 (bytes_xfer_now
- rs
->bytes_xfer_prev
) / 2) &&
881 (++rs
->dirty_rate_high_cnt
>= 2)) {
882 trace_migration_throttle();
883 rs
->dirty_rate_high_cnt
= 0;
884 mig_throttle_guest_down();
888 if (migrate_use_xbzrle()) {
889 if (rs
->iterations_prev
!= rs
->iterations
) {
890 xbzrle_counters
.cache_miss_rate
=
891 (double)(xbzrle_counters
.cache_miss
-
892 rs
->xbzrle_cache_miss_prev
) /
893 (rs
->iterations
- rs
->iterations_prev
);
895 rs
->iterations_prev
= rs
->iterations
;
896 rs
->xbzrle_cache_miss_prev
= xbzrle_counters
.cache_miss
;
899 /* reset period counters */
900 rs
->time_last_bitmap_sync
= end_time
;
901 rs
->num_dirty_pages_period
= 0;
902 rs
->bytes_xfer_prev
= bytes_xfer_now
;
904 if (migrate_use_events()) {
905 qapi_event_send_migration_pass(ram_counters
.dirty_sync_count
, NULL
);
910 * save_zero_page: send the zero page to the stream
912 * Returns the number of pages written.
914 * @rs: current RAM state
915 * @block: block that contains the page we want to send
916 * @offset: offset inside the block for the page
918 static int save_zero_page(RAMState
*rs
, RAMBlock
*block
, ram_addr_t offset
)
920 uint8_t *p
= block
->host
+ offset
;
923 if (is_zero_range(p
, TARGET_PAGE_SIZE
)) {
924 ram_counters
.duplicate
++;
925 ram_counters
.transferred
+=
926 save_page_header(rs
, rs
->f
, block
, offset
| RAM_SAVE_FLAG_ZERO
);
927 qemu_put_byte(rs
->f
, 0);
928 ram_counters
.transferred
+= 1;
935 static void ram_release_pages(const char *rbname
, uint64_t offset
, int pages
)
937 if (!migrate_release_ram() || !migration_in_postcopy()) {
941 ram_discard_range(rbname
, offset
, pages
<< TARGET_PAGE_BITS
);
945 * ram_save_page: send the given page to the stream
947 * Returns the number of pages written.
949 * >=0 - Number of pages written - this might legally be 0
950 * if xbzrle noticed the page was the same.
952 * @rs: current RAM state
953 * @block: block that contains the page we want to send
954 * @offset: offset inside the block for the page
955 * @last_stage: if we are at the completion stage
957 static int ram_save_page(RAMState
*rs
, PageSearchStatus
*pss
, bool last_stage
)
961 ram_addr_t current_addr
;
964 bool send_async
= true;
965 RAMBlock
*block
= pss
->block
;
966 ram_addr_t offset
= pss
->page
<< TARGET_PAGE_BITS
;
968 p
= block
->host
+ offset
;
969 trace_ram_save_page(block
->idstr
, (uint64_t)offset
, p
);
971 /* In doubt sent page as normal */
973 ret
= ram_control_save_page(rs
->f
, block
->offset
,
974 offset
, TARGET_PAGE_SIZE
, &bytes_xmit
);
976 ram_counters
.transferred
+= bytes_xmit
;
982 current_addr
= block
->offset
+ offset
;
984 if (ret
!= RAM_SAVE_CONTROL_NOT_SUPP
) {
985 if (ret
!= RAM_SAVE_CONTROL_DELAYED
) {
986 if (bytes_xmit
> 0) {
987 ram_counters
.normal
++;
988 } else if (bytes_xmit
== 0) {
989 ram_counters
.duplicate
++;
993 pages
= save_zero_page(rs
, block
, offset
);
995 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
996 * page would be stale
998 xbzrle_cache_zero_page(rs
, current_addr
);
999 ram_release_pages(block
->idstr
, offset
, pages
);
1000 } else if (!rs
->ram_bulk_stage
&&
1001 !migration_in_postcopy() && migrate_use_xbzrle()) {
1002 pages
= save_xbzrle_page(rs
, &p
, current_addr
, block
,
1003 offset
, last_stage
);
1005 /* Can't send this cached data async, since the cache page
1006 * might get updated before it gets to the wire
1013 /* XBZRLE overflow or normal page */
1015 ram_counters
.transferred
+=
1016 save_page_header(rs
, rs
->f
, block
, offset
| RAM_SAVE_FLAG_PAGE
);
1018 qemu_put_buffer_async(rs
->f
, p
, TARGET_PAGE_SIZE
,
1019 migrate_release_ram() &
1020 migration_in_postcopy());
1022 qemu_put_buffer(rs
->f
, p
, TARGET_PAGE_SIZE
);
1024 ram_counters
.transferred
+= TARGET_PAGE_SIZE
;
1026 ram_counters
.normal
++;
1029 XBZRLE_cache_unlock();
1034 static int do_compress_ram_page(QEMUFile
*f
, RAMBlock
*block
,
1037 RAMState
*rs
= ram_state
;
1038 int bytes_sent
, blen
;
1039 uint8_t *p
= block
->host
+ (offset
& TARGET_PAGE_MASK
);
1041 bytes_sent
= save_page_header(rs
, f
, block
, offset
|
1042 RAM_SAVE_FLAG_COMPRESS_PAGE
);
1043 blen
= qemu_put_compression_data(f
, p
, TARGET_PAGE_SIZE
,
1044 migrate_compress_level());
1047 qemu_file_set_error(migrate_get_current()->to_dst_file
, blen
);
1048 error_report("compressed data failed!");
1051 ram_release_pages(block
->idstr
, offset
& TARGET_PAGE_MASK
, 1);
1057 static void flush_compressed_data(RAMState
*rs
)
1059 int idx
, len
, thread_count
;
1061 if (!migrate_use_compression()) {
1064 thread_count
= migrate_compress_threads();
1066 qemu_mutex_lock(&comp_done_lock
);
1067 for (idx
= 0; idx
< thread_count
; idx
++) {
1068 while (!comp_param
[idx
].done
) {
1069 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
1072 qemu_mutex_unlock(&comp_done_lock
);
1074 for (idx
= 0; idx
< thread_count
; idx
++) {
1075 qemu_mutex_lock(&comp_param
[idx
].mutex
);
1076 if (!comp_param
[idx
].quit
) {
1077 len
= qemu_put_qemu_file(rs
->f
, comp_param
[idx
].file
);
1078 ram_counters
.transferred
+= len
;
1080 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
1084 static inline void set_compress_params(CompressParam
*param
, RAMBlock
*block
,
1087 param
->block
= block
;
1088 param
->offset
= offset
;
1091 static int compress_page_with_multi_thread(RAMState
*rs
, RAMBlock
*block
,
1094 int idx
, thread_count
, bytes_xmit
= -1, pages
= -1;
1096 thread_count
= migrate_compress_threads();
1097 qemu_mutex_lock(&comp_done_lock
);
1099 for (idx
= 0; idx
< thread_count
; idx
++) {
1100 if (comp_param
[idx
].done
) {
1101 comp_param
[idx
].done
= false;
1102 bytes_xmit
= qemu_put_qemu_file(rs
->f
, comp_param
[idx
].file
);
1103 qemu_mutex_lock(&comp_param
[idx
].mutex
);
1104 set_compress_params(&comp_param
[idx
], block
, offset
);
1105 qemu_cond_signal(&comp_param
[idx
].cond
);
1106 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
1108 ram_counters
.normal
++;
1109 ram_counters
.transferred
+= bytes_xmit
;
1116 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
1119 qemu_mutex_unlock(&comp_done_lock
);
1125 * ram_save_compressed_page: compress the given page and send it to the stream
1127 * Returns the number of pages written.
1129 * @rs: current RAM state
1130 * @block: block that contains the page we want to send
1131 * @offset: offset inside the block for the page
1132 * @last_stage: if we are at the completion stage
1134 static int ram_save_compressed_page(RAMState
*rs
, PageSearchStatus
*pss
,
1138 uint64_t bytes_xmit
= 0;
1141 RAMBlock
*block
= pss
->block
;
1142 ram_addr_t offset
= pss
->page
<< TARGET_PAGE_BITS
;
1144 p
= block
->host
+ offset
;
1146 ret
= ram_control_save_page(rs
->f
, block
->offset
,
1147 offset
, TARGET_PAGE_SIZE
, &bytes_xmit
);
1149 ram_counters
.transferred
+= bytes_xmit
;
1152 if (ret
!= RAM_SAVE_CONTROL_NOT_SUPP
) {
1153 if (ret
!= RAM_SAVE_CONTROL_DELAYED
) {
1154 if (bytes_xmit
> 0) {
1155 ram_counters
.normal
++;
1156 } else if (bytes_xmit
== 0) {
1157 ram_counters
.duplicate
++;
1161 /* When starting the process of a new block, the first page of
1162 * the block should be sent out before other pages in the same
1163 * block, and all the pages in last block should have been sent
1164 * out, keeping this order is important, because the 'cont' flag
1165 * is used to avoid resending the block name.
1167 if (block
!= rs
->last_sent_block
) {
1168 flush_compressed_data(rs
);
1169 pages
= save_zero_page(rs
, block
, offset
);
1171 /* Make sure the first page is sent out before other pages */
1172 bytes_xmit
= save_page_header(rs
, rs
->f
, block
, offset
|
1173 RAM_SAVE_FLAG_COMPRESS_PAGE
);
1174 blen
= qemu_put_compression_data(rs
->f
, p
, TARGET_PAGE_SIZE
,
1175 migrate_compress_level());
1177 ram_counters
.transferred
+= bytes_xmit
+ blen
;
1178 ram_counters
.normal
++;
1181 qemu_file_set_error(rs
->f
, blen
);
1182 error_report("compressed data failed!");
1186 ram_release_pages(block
->idstr
, offset
, pages
);
1189 pages
= save_zero_page(rs
, block
, offset
);
1191 pages
= compress_page_with_multi_thread(rs
, block
, offset
);
1193 ram_release_pages(block
->idstr
, offset
, pages
);
1202 * find_dirty_block: find the next dirty page and update any state
1203 * associated with the search process.
1205 * Returns if a page is found
1207 * @rs: current RAM state
1208 * @pss: data about the state of the current dirty page scan
1209 * @again: set to false if the search has scanned the whole of RAM
1211 static bool find_dirty_block(RAMState
*rs
, PageSearchStatus
*pss
, bool *again
)
1213 pss
->page
= migration_bitmap_find_dirty(rs
, pss
->block
, pss
->page
);
1214 if (pss
->complete_round
&& pss
->block
== rs
->last_seen_block
&&
1215 pss
->page
>= rs
->last_page
) {
1217 * We've been once around the RAM and haven't found anything.
1223 if ((pss
->page
<< TARGET_PAGE_BITS
) >= pss
->block
->used_length
) {
1224 /* Didn't find anything in this RAM Block */
1226 pss
->block
= QLIST_NEXT_RCU(pss
->block
, next
);
1228 /* Hit the end of the list */
1229 pss
->block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1230 /* Flag that we've looped */
1231 pss
->complete_round
= true;
1232 rs
->ram_bulk_stage
= false;
1233 if (migrate_use_xbzrle()) {
1234 /* If xbzrle is on, stop using the data compression at this
1235 * point. In theory, xbzrle can do better than compression.
1237 flush_compressed_data(rs
);
1240 /* Didn't find anything this time, but try again on the new block */
1244 /* Can go around again, but... */
1246 /* We've found something so probably don't need to */
1252 * unqueue_page: gets a page of the queue
1254 * Helper for 'get_queued_page' - gets a page off the queue
1256 * Returns the block of the page (or NULL if none available)
1258 * @rs: current RAM state
1259 * @offset: used to return the offset within the RAMBlock
1261 static RAMBlock
*unqueue_page(RAMState
*rs
, ram_addr_t
*offset
)
1263 RAMBlock
*block
= NULL
;
1265 qemu_mutex_lock(&rs
->src_page_req_mutex
);
1266 if (!QSIMPLEQ_EMPTY(&rs
->src_page_requests
)) {
1267 struct RAMSrcPageRequest
*entry
=
1268 QSIMPLEQ_FIRST(&rs
->src_page_requests
);
1270 *offset
= entry
->offset
;
1272 if (entry
->len
> TARGET_PAGE_SIZE
) {
1273 entry
->len
-= TARGET_PAGE_SIZE
;
1274 entry
->offset
+= TARGET_PAGE_SIZE
;
1276 memory_region_unref(block
->mr
);
1277 QSIMPLEQ_REMOVE_HEAD(&rs
->src_page_requests
, next_req
);
1281 qemu_mutex_unlock(&rs
->src_page_req_mutex
);
1287 * get_queued_page: unqueue a page from the postocpy requests
1289 * Skips pages that are already sent (!dirty)
1291 * Returns if a queued page is found
1293 * @rs: current RAM state
1294 * @pss: data about the state of the current dirty page scan
1296 static bool get_queued_page(RAMState
*rs
, PageSearchStatus
*pss
)
1303 block
= unqueue_page(rs
, &offset
);
1305 * We're sending this page, and since it's postcopy nothing else
1306 * will dirty it, and we must make sure it doesn't get sent again
1307 * even if this queue request was received after the background
1308 * search already sent it.
1313 page
= offset
>> TARGET_PAGE_BITS
;
1314 dirty
= test_bit(page
, block
->bmap
);
1316 trace_get_queued_page_not_dirty(block
->idstr
, (uint64_t)offset
,
1317 page
, test_bit(page
, block
->unsentmap
));
1319 trace_get_queued_page(block
->idstr
, (uint64_t)offset
, page
);
1323 } while (block
&& !dirty
);
1327 * As soon as we start servicing pages out of order, then we have
1328 * to kill the bulk stage, since the bulk stage assumes
1329 * in (migration_bitmap_find_and_reset_dirty) that every page is
1330 * dirty, that's no longer true.
1332 rs
->ram_bulk_stage
= false;
1335 * We want the background search to continue from the queued page
1336 * since the guest is likely to want other pages near to the page
1337 * it just requested.
1340 pss
->page
= offset
>> TARGET_PAGE_BITS
;
1347 * migration_page_queue_free: drop any remaining pages in the ram
1350 * It should be empty at the end anyway, but in error cases there may
1351 * be some left. in case that there is any page left, we drop it.
1354 static void migration_page_queue_free(RAMState
*rs
)
1356 struct RAMSrcPageRequest
*mspr
, *next_mspr
;
1357 /* This queue generally should be empty - but in the case of a failed
1358 * migration might have some droppings in.
1361 QSIMPLEQ_FOREACH_SAFE(mspr
, &rs
->src_page_requests
, next_req
, next_mspr
) {
1362 memory_region_unref(mspr
->rb
->mr
);
1363 QSIMPLEQ_REMOVE_HEAD(&rs
->src_page_requests
, next_req
);
1370 * ram_save_queue_pages: queue the page for transmission
1372 * A request from postcopy destination for example.
1374 * Returns zero on success or negative on error
1376 * @rbname: Name of the RAMBLock of the request. NULL means the
1377 * same that last one.
1378 * @start: starting address from the start of the RAMBlock
1379 * @len: length (in bytes) to send
1381 int ram_save_queue_pages(const char *rbname
, ram_addr_t start
, ram_addr_t len
)
1384 RAMState
*rs
= ram_state
;
1386 ram_counters
.postcopy_requests
++;
1389 /* Reuse last RAMBlock */
1390 ramblock
= rs
->last_req_rb
;
1394 * Shouldn't happen, we can't reuse the last RAMBlock if
1395 * it's the 1st request.
1397 error_report("ram_save_queue_pages no previous block");
1401 ramblock
= qemu_ram_block_by_name(rbname
);
1404 /* We shouldn't be asked for a non-existent RAMBlock */
1405 error_report("ram_save_queue_pages no block '%s'", rbname
);
1408 rs
->last_req_rb
= ramblock
;
1410 trace_ram_save_queue_pages(ramblock
->idstr
, start
, len
);
1411 if (start
+len
> ramblock
->used_length
) {
1412 error_report("%s request overrun start=" RAM_ADDR_FMT
" len="
1413 RAM_ADDR_FMT
" blocklen=" RAM_ADDR_FMT
,
1414 __func__
, start
, len
, ramblock
->used_length
);
1418 struct RAMSrcPageRequest
*new_entry
=
1419 g_malloc0(sizeof(struct RAMSrcPageRequest
));
1420 new_entry
->rb
= ramblock
;
1421 new_entry
->offset
= start
;
1422 new_entry
->len
= len
;
1424 memory_region_ref(ramblock
->mr
);
1425 qemu_mutex_lock(&rs
->src_page_req_mutex
);
1426 QSIMPLEQ_INSERT_TAIL(&rs
->src_page_requests
, new_entry
, next_req
);
1427 qemu_mutex_unlock(&rs
->src_page_req_mutex
);
1438 * ram_save_target_page: save one target page
1440 * Returns the number of pages written
1442 * @rs: current RAM state
1443 * @ms: current migration state
1444 * @pss: data about the page we want to send
1445 * @last_stage: if we are at the completion stage
1447 static int ram_save_target_page(RAMState
*rs
, PageSearchStatus
*pss
,
1452 /* Check the pages is dirty and if it is send it */
1453 if (migration_bitmap_clear_dirty(rs
, pss
->block
, pss
->page
)) {
1455 * If xbzrle is on, stop using the data compression after first
1456 * round of migration even if compression is enabled. In theory,
1457 * xbzrle can do better than compression.
1459 if (migrate_use_compression() &&
1460 (rs
->ram_bulk_stage
|| !migrate_use_xbzrle())) {
1461 res
= ram_save_compressed_page(rs
, pss
, last_stage
);
1463 res
= ram_save_page(rs
, pss
, last_stage
);
1469 if (pss
->block
->unsentmap
) {
1470 clear_bit(pss
->page
, pss
->block
->unsentmap
);
1478 * ram_save_host_page: save a whole host page
1480 * Starting at *offset send pages up to the end of the current host
1481 * page. It's valid for the initial offset to point into the middle of
1482 * a host page in which case the remainder of the hostpage is sent.
1483 * Only dirty target pages are sent. Note that the host page size may
1484 * be a huge page for this block.
1485 * The saving stops at the boundary of the used_length of the block
1486 * if the RAMBlock isn't a multiple of the host page size.
1488 * Returns the number of pages written or negative on error
1490 * @rs: current RAM state
1491 * @ms: current migration state
1492 * @pss: data about the page we want to send
1493 * @last_stage: if we are at the completion stage
1495 static int ram_save_host_page(RAMState
*rs
, PageSearchStatus
*pss
,
1498 int tmppages
, pages
= 0;
1499 size_t pagesize_bits
=
1500 qemu_ram_pagesize(pss
->block
) >> TARGET_PAGE_BITS
;
1503 tmppages
= ram_save_target_page(rs
, pss
, last_stage
);
1510 } while ((pss
->page
& (pagesize_bits
- 1)) &&
1511 offset_in_ramblock(pss
->block
, pss
->page
<< TARGET_PAGE_BITS
));
1513 /* The offset we leave with is the last one we looked at */
1519 * ram_find_and_save_block: finds a dirty page and sends it to f
1521 * Called within an RCU critical section.
1523 * Returns the number of pages written where zero means no dirty pages
1525 * @rs: current RAM state
1526 * @last_stage: if we are at the completion stage
1528 * On systems where host-page-size > target-page-size it will send all the
1529 * pages in a host page that are dirty.
1532 static int ram_find_and_save_block(RAMState
*rs
, bool last_stage
)
1534 PageSearchStatus pss
;
1538 /* No dirty page as there is zero RAM */
1539 if (!ram_bytes_total()) {
1543 pss
.block
= rs
->last_seen_block
;
1544 pss
.page
= rs
->last_page
;
1545 pss
.complete_round
= false;
1548 pss
.block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1553 found
= get_queued_page(rs
, &pss
);
1556 /* priority queue empty, so just search for something dirty */
1557 found
= find_dirty_block(rs
, &pss
, &again
);
1561 pages
= ram_save_host_page(rs
, &pss
, last_stage
);
1563 } while (!pages
&& again
);
1565 rs
->last_seen_block
= pss
.block
;
1566 rs
->last_page
= pss
.page
;
1571 void acct_update_position(QEMUFile
*f
, size_t size
, bool zero
)
1573 uint64_t pages
= size
/ TARGET_PAGE_SIZE
;
1576 ram_counters
.duplicate
+= pages
;
1578 ram_counters
.normal
+= pages
;
1579 ram_counters
.transferred
+= size
;
1580 qemu_update_position(f
, size
);
1584 uint64_t ram_bytes_total(void)
1590 RAMBLOCK_FOREACH(block
) {
1591 total
+= block
->used_length
;
1597 static void xbzrle_load_setup(void)
1599 XBZRLE
.decoded_buf
= g_malloc(TARGET_PAGE_SIZE
);
1602 static void xbzrle_load_cleanup(void)
1604 g_free(XBZRLE
.decoded_buf
);
1605 XBZRLE
.decoded_buf
= NULL
;
1608 static void ram_state_cleanup(RAMState
**rsp
)
1611 migration_page_queue_free(*rsp
);
1612 qemu_mutex_destroy(&(*rsp
)->bitmap_mutex
);
1613 qemu_mutex_destroy(&(*rsp
)->src_page_req_mutex
);
1619 static void xbzrle_cleanup(void)
1621 XBZRLE_cache_lock();
1623 cache_fini(XBZRLE
.cache
);
1624 g_free(XBZRLE
.encoded_buf
);
1625 g_free(XBZRLE
.current_buf
);
1626 g_free(XBZRLE
.zero_target_page
);
1627 XBZRLE
.cache
= NULL
;
1628 XBZRLE
.encoded_buf
= NULL
;
1629 XBZRLE
.current_buf
= NULL
;
1630 XBZRLE
.zero_target_page
= NULL
;
1632 XBZRLE_cache_unlock();
1635 static void ram_save_cleanup(void *opaque
)
1637 RAMState
**rsp
= opaque
;
1640 /* caller have hold iothread lock or is in a bh, so there is
1641 * no writing race against this migration_bitmap
1643 memory_global_dirty_log_stop();
1645 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1646 g_free(block
->bmap
);
1648 g_free(block
->unsentmap
);
1649 block
->unsentmap
= NULL
;
1653 compress_threads_save_cleanup();
1654 ram_state_cleanup(rsp
);
1657 static void ram_state_reset(RAMState
*rs
)
1659 rs
->last_seen_block
= NULL
;
1660 rs
->last_sent_block
= NULL
;
1662 rs
->last_version
= ram_list
.version
;
1663 rs
->ram_bulk_stage
= true;
1666 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1669 * 'expected' is the value you expect the bitmap mostly to be full
1670 * of; it won't bother printing lines that are all this value.
1671 * If 'todump' is null the migration bitmap is dumped.
1673 void ram_debug_dump_bitmap(unsigned long *todump
, bool expected
,
1674 unsigned long pages
)
1677 int64_t linelen
= 128;
1680 for (cur
= 0; cur
< pages
; cur
+= linelen
) {
1684 * Last line; catch the case where the line length
1685 * is longer than remaining ram
1687 if (cur
+ linelen
> pages
) {
1688 linelen
= pages
- cur
;
1690 for (curb
= 0; curb
< linelen
; curb
++) {
1691 bool thisbit
= test_bit(cur
+ curb
, todump
);
1692 linebuf
[curb
] = thisbit
? '1' : '.';
1693 found
= found
|| (thisbit
!= expected
);
1696 linebuf
[curb
] = '\0';
1697 fprintf(stderr
, "0x%08" PRIx64
" : %s\n", cur
, linebuf
);
1702 /* **** functions for postcopy ***** */
1704 void ram_postcopy_migrated_memory_release(MigrationState
*ms
)
1706 struct RAMBlock
*block
;
1708 RAMBLOCK_FOREACH(block
) {
1709 unsigned long *bitmap
= block
->bmap
;
1710 unsigned long range
= block
->used_length
>> TARGET_PAGE_BITS
;
1711 unsigned long run_start
= find_next_zero_bit(bitmap
, range
, 0);
1713 while (run_start
< range
) {
1714 unsigned long run_end
= find_next_bit(bitmap
, range
, run_start
+ 1);
1715 ram_discard_range(block
->idstr
, run_start
<< TARGET_PAGE_BITS
,
1716 (run_end
- run_start
) << TARGET_PAGE_BITS
);
1717 run_start
= find_next_zero_bit(bitmap
, range
, run_end
+ 1);
1723 * postcopy_send_discard_bm_ram: discard a RAMBlock
1725 * Returns zero on success
1727 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1728 * Note: At this point the 'unsentmap' is the processed bitmap combined
1729 * with the dirtymap; so a '1' means it's either dirty or unsent.
1731 * @ms: current migration state
1732 * @pds: state for postcopy
1733 * @start: RAMBlock starting page
1734 * @length: RAMBlock size
1736 static int postcopy_send_discard_bm_ram(MigrationState
*ms
,
1737 PostcopyDiscardState
*pds
,
1740 unsigned long end
= block
->used_length
>> TARGET_PAGE_BITS
;
1741 unsigned long current
;
1742 unsigned long *unsentmap
= block
->unsentmap
;
1744 for (current
= 0; current
< end
; ) {
1745 unsigned long one
= find_next_bit(unsentmap
, end
, current
);
1748 unsigned long zero
= find_next_zero_bit(unsentmap
, end
, one
+ 1);
1749 unsigned long discard_length
;
1752 discard_length
= end
- one
;
1754 discard_length
= zero
- one
;
1756 if (discard_length
) {
1757 postcopy_discard_send_range(ms
, pds
, one
, discard_length
);
1759 current
= one
+ discard_length
;
1769 * postcopy_each_ram_send_discard: discard all RAMBlocks
1771 * Returns 0 for success or negative for error
1773 * Utility for the outgoing postcopy code.
1774 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1775 * passing it bitmap indexes and name.
1776 * (qemu_ram_foreach_block ends up passing unscaled lengths
1777 * which would mean postcopy code would have to deal with target page)
1779 * @ms: current migration state
1781 static int postcopy_each_ram_send_discard(MigrationState
*ms
)
1783 struct RAMBlock
*block
;
1786 RAMBLOCK_FOREACH(block
) {
1787 PostcopyDiscardState
*pds
=
1788 postcopy_discard_send_init(ms
, block
->idstr
);
1791 * Postcopy sends chunks of bitmap over the wire, but it
1792 * just needs indexes at this point, avoids it having
1793 * target page specific code.
1795 ret
= postcopy_send_discard_bm_ram(ms
, pds
, block
);
1796 postcopy_discard_send_finish(ms
, pds
);
1806 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1808 * Helper for postcopy_chunk_hostpages; it's called twice to
1809 * canonicalize the two bitmaps, that are similar, but one is
1812 * Postcopy requires that all target pages in a hostpage are dirty or
1813 * clean, not a mix. This function canonicalizes the bitmaps.
1815 * @ms: current migration state
1816 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1817 * otherwise we need to canonicalize partially dirty host pages
1818 * @block: block that contains the page we want to canonicalize
1819 * @pds: state for postcopy
1821 static void postcopy_chunk_hostpages_pass(MigrationState
*ms
, bool unsent_pass
,
1823 PostcopyDiscardState
*pds
)
1825 RAMState
*rs
= ram_state
;
1826 unsigned long *bitmap
= block
->bmap
;
1827 unsigned long *unsentmap
= block
->unsentmap
;
1828 unsigned int host_ratio
= block
->page_size
/ TARGET_PAGE_SIZE
;
1829 unsigned long pages
= block
->used_length
>> TARGET_PAGE_BITS
;
1830 unsigned long run_start
;
1832 if (block
->page_size
== TARGET_PAGE_SIZE
) {
1833 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1838 /* Find a sent page */
1839 run_start
= find_next_zero_bit(unsentmap
, pages
, 0);
1841 /* Find a dirty page */
1842 run_start
= find_next_bit(bitmap
, pages
, 0);
1845 while (run_start
< pages
) {
1846 bool do_fixup
= false;
1847 unsigned long fixup_start_addr
;
1848 unsigned long host_offset
;
1851 * If the start of this run of pages is in the middle of a host
1852 * page, then we need to fixup this host page.
1854 host_offset
= run_start
% host_ratio
;
1857 run_start
-= host_offset
;
1858 fixup_start_addr
= run_start
;
1859 /* For the next pass */
1860 run_start
= run_start
+ host_ratio
;
1862 /* Find the end of this run */
1863 unsigned long run_end
;
1865 run_end
= find_next_bit(unsentmap
, pages
, run_start
+ 1);
1867 run_end
= find_next_zero_bit(bitmap
, pages
, run_start
+ 1);
1870 * If the end isn't at the start of a host page, then the
1871 * run doesn't finish at the end of a host page
1872 * and we need to discard.
1874 host_offset
= run_end
% host_ratio
;
1877 fixup_start_addr
= run_end
- host_offset
;
1879 * This host page has gone, the next loop iteration starts
1880 * from after the fixup
1882 run_start
= fixup_start_addr
+ host_ratio
;
1885 * No discards on this iteration, next loop starts from
1886 * next sent/dirty page
1888 run_start
= run_end
+ 1;
1895 /* Tell the destination to discard this page */
1896 if (unsent_pass
|| !test_bit(fixup_start_addr
, unsentmap
)) {
1897 /* For the unsent_pass we:
1898 * discard partially sent pages
1899 * For the !unsent_pass (dirty) we:
1900 * discard partially dirty pages that were sent
1901 * (any partially sent pages were already discarded
1902 * by the previous unsent_pass)
1904 postcopy_discard_send_range(ms
, pds
, fixup_start_addr
,
1908 /* Clean up the bitmap */
1909 for (page
= fixup_start_addr
;
1910 page
< fixup_start_addr
+ host_ratio
; page
++) {
1911 /* All pages in this host page are now not sent */
1912 set_bit(page
, unsentmap
);
1915 * Remark them as dirty, updating the count for any pages
1916 * that weren't previously dirty.
1918 rs
->migration_dirty_pages
+= !test_and_set_bit(page
, bitmap
);
1923 /* Find the next sent page for the next iteration */
1924 run_start
= find_next_zero_bit(unsentmap
, pages
, run_start
);
1926 /* Find the next dirty page for the next iteration */
1927 run_start
= find_next_bit(bitmap
, pages
, run_start
);
1933 * postcopy_chuck_hostpages: discrad any partially sent host page
1935 * Utility for the outgoing postcopy code.
1937 * Discard any partially sent host-page size chunks, mark any partially
1938 * dirty host-page size chunks as all dirty. In this case the host-page
1939 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1941 * Returns zero on success
1943 * @ms: current migration state
1944 * @block: block we want to work with
1946 static int postcopy_chunk_hostpages(MigrationState
*ms
, RAMBlock
*block
)
1948 PostcopyDiscardState
*pds
=
1949 postcopy_discard_send_init(ms
, block
->idstr
);
1951 /* First pass: Discard all partially sent host pages */
1952 postcopy_chunk_hostpages_pass(ms
, true, block
, pds
);
1954 * Second pass: Ensure that all partially dirty host pages are made
1957 postcopy_chunk_hostpages_pass(ms
, false, block
, pds
);
1959 postcopy_discard_send_finish(ms
, pds
);
1964 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1966 * Returns zero on success
1968 * Transmit the set of pages to be discarded after precopy to the target
1969 * these are pages that:
1970 * a) Have been previously transmitted but are now dirty again
1971 * b) Pages that have never been transmitted, this ensures that
1972 * any pages on the destination that have been mapped by background
1973 * tasks get discarded (transparent huge pages is the specific concern)
1974 * Hopefully this is pretty sparse
1976 * @ms: current migration state
1978 int ram_postcopy_send_discard_bitmap(MigrationState
*ms
)
1980 RAMState
*rs
= ram_state
;
1986 /* This should be our last sync, the src is now paused */
1987 migration_bitmap_sync(rs
);
1989 /* Easiest way to make sure we don't resume in the middle of a host-page */
1990 rs
->last_seen_block
= NULL
;
1991 rs
->last_sent_block
= NULL
;
1994 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1995 unsigned long pages
= block
->used_length
>> TARGET_PAGE_BITS
;
1996 unsigned long *bitmap
= block
->bmap
;
1997 unsigned long *unsentmap
= block
->unsentmap
;
2000 /* We don't have a safe way to resize the sentmap, so
2001 * if the bitmap was resized it will be NULL at this
2004 error_report("migration ram resized during precopy phase");
2008 /* Deal with TPS != HPS and huge pages */
2009 ret
= postcopy_chunk_hostpages(ms
, block
);
2016 * Update the unsentmap to be unsentmap = unsentmap | dirty
2018 bitmap_or(unsentmap
, unsentmap
, bitmap
, pages
);
2019 #ifdef DEBUG_POSTCOPY
2020 ram_debug_dump_bitmap(unsentmap
, true, pages
);
2023 trace_ram_postcopy_send_discard_bitmap();
2025 ret
= postcopy_each_ram_send_discard(ms
);
2032 * ram_discard_range: discard dirtied pages at the beginning of postcopy
2034 * Returns zero on success
2036 * @rbname: name of the RAMBlock of the request. NULL means the
2037 * same that last one.
2038 * @start: RAMBlock starting page
2039 * @length: RAMBlock size
2041 int ram_discard_range(const char *rbname
, uint64_t start
, size_t length
)
2045 trace_ram_discard_range(rbname
, start
, length
);
2048 RAMBlock
*rb
= qemu_ram_block_by_name(rbname
);
2051 error_report("ram_discard_range: Failed to find block '%s'", rbname
);
2055 bitmap_clear(rb
->receivedmap
, start
>> qemu_target_page_bits(),
2056 length
>> qemu_target_page_bits());
2057 ret
= ram_block_discard_range(rb
, start
, length
);
2066 * For every allocation, we will try not to crash the VM if the
2067 * allocation failed.
2069 static int xbzrle_init(void)
2071 Error
*local_err
= NULL
;
2073 if (!migrate_use_xbzrle()) {
2077 XBZRLE_cache_lock();
2079 XBZRLE
.zero_target_page
= g_try_malloc0(TARGET_PAGE_SIZE
);
2080 if (!XBZRLE
.zero_target_page
) {
2081 error_report("%s: Error allocating zero page", __func__
);
2085 XBZRLE
.cache
= cache_init(migrate_xbzrle_cache_size(),
2086 TARGET_PAGE_SIZE
, &local_err
);
2087 if (!XBZRLE
.cache
) {
2088 error_report_err(local_err
);
2089 goto free_zero_page
;
2092 XBZRLE
.encoded_buf
= g_try_malloc0(TARGET_PAGE_SIZE
);
2093 if (!XBZRLE
.encoded_buf
) {
2094 error_report("%s: Error allocating encoded_buf", __func__
);
2098 XBZRLE
.current_buf
= g_try_malloc(TARGET_PAGE_SIZE
);
2099 if (!XBZRLE
.current_buf
) {
2100 error_report("%s: Error allocating current_buf", __func__
);
2101 goto free_encoded_buf
;
2104 /* We are all good */
2105 XBZRLE_cache_unlock();
2109 g_free(XBZRLE
.encoded_buf
);
2110 XBZRLE
.encoded_buf
= NULL
;
2112 cache_fini(XBZRLE
.cache
);
2113 XBZRLE
.cache
= NULL
;
2115 g_free(XBZRLE
.zero_target_page
);
2116 XBZRLE
.zero_target_page
= NULL
;
2118 XBZRLE_cache_unlock();
2122 static int ram_state_init(RAMState
**rsp
)
2124 *rsp
= g_try_new0(RAMState
, 1);
2127 error_report("%s: Init ramstate fail", __func__
);
2131 qemu_mutex_init(&(*rsp
)->bitmap_mutex
);
2132 qemu_mutex_init(&(*rsp
)->src_page_req_mutex
);
2133 QSIMPLEQ_INIT(&(*rsp
)->src_page_requests
);
2136 * Count the total number of pages used by ram blocks not including any
2137 * gaps due to alignment or unplugs.
2139 (*rsp
)->migration_dirty_pages
= ram_bytes_total() >> TARGET_PAGE_BITS
;
2141 ram_state_reset(*rsp
);
2146 static void ram_list_init_bitmaps(void)
2149 unsigned long pages
;
2151 /* Skip setting bitmap if there is no RAM */
2152 if (ram_bytes_total()) {
2153 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
2154 pages
= block
->max_length
>> TARGET_PAGE_BITS
;
2155 block
->bmap
= bitmap_new(pages
);
2156 bitmap_set(block
->bmap
, 0, pages
);
2157 if (migrate_postcopy_ram()) {
2158 block
->unsentmap
= bitmap_new(pages
);
2159 bitmap_set(block
->unsentmap
, 0, pages
);
2165 static void ram_init_bitmaps(RAMState
*rs
)
2167 /* For memory_global_dirty_log_start below. */
2168 qemu_mutex_lock_iothread();
2169 qemu_mutex_lock_ramlist();
2172 ram_list_init_bitmaps();
2173 memory_global_dirty_log_start();
2174 migration_bitmap_sync(rs
);
2177 qemu_mutex_unlock_ramlist();
2178 qemu_mutex_unlock_iothread();
2181 static int ram_init_all(RAMState
**rsp
)
2183 if (ram_state_init(rsp
)) {
2187 if (xbzrle_init()) {
2188 ram_state_cleanup(rsp
);
2192 ram_init_bitmaps(*rsp
);
2198 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2199 * long-running RCU critical section. When rcu-reclaims in the code
2200 * start to become numerous it will be necessary to reduce the
2201 * granularity of these critical sections.
2205 * ram_save_setup: Setup RAM for migration
2207 * Returns zero to indicate success and negative for error
2209 * @f: QEMUFile where to send the data
2210 * @opaque: RAMState pointer
2212 static int ram_save_setup(QEMUFile
*f
, void *opaque
)
2214 RAMState
**rsp
= opaque
;
2217 /* migration has already setup the bitmap, reuse it. */
2218 if (!migration_in_colo_state()) {
2219 if (ram_init_all(rsp
) != 0) {
2227 qemu_put_be64(f
, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE
);
2229 RAMBLOCK_FOREACH(block
) {
2230 qemu_put_byte(f
, strlen(block
->idstr
));
2231 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, strlen(block
->idstr
));
2232 qemu_put_be64(f
, block
->used_length
);
2233 if (migrate_postcopy_ram() && block
->page_size
!= qemu_host_page_size
) {
2234 qemu_put_be64(f
, block
->page_size
);
2239 compress_threads_save_setup();
2241 ram_control_before_iterate(f
, RAM_CONTROL_SETUP
);
2242 ram_control_after_iterate(f
, RAM_CONTROL_SETUP
);
2244 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2250 * ram_save_iterate: iterative stage for migration
2252 * Returns zero to indicate success and negative for error
2254 * @f: QEMUFile where to send the data
2255 * @opaque: RAMState pointer
2257 static int ram_save_iterate(QEMUFile
*f
, void *opaque
)
2259 RAMState
**temp
= opaque
;
2260 RAMState
*rs
= *temp
;
2266 if (blk_mig_bulk_active()) {
2267 /* Avoid transferring ram during bulk phase of block migration as
2268 * the bulk phase will usually take a long time and transferring
2269 * ram updates during that time is pointless. */
2274 if (ram_list
.version
!= rs
->last_version
) {
2275 ram_state_reset(rs
);
2278 /* Read version before ram_list.blocks */
2281 ram_control_before_iterate(f
, RAM_CONTROL_ROUND
);
2283 t0
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2285 while ((ret
= qemu_file_rate_limit(f
)) == 0) {
2288 pages
= ram_find_and_save_block(rs
, false);
2289 /* no more pages to sent */
2296 /* we want to check in the 1st loop, just in case it was the 1st time
2297 and we had to sync the dirty bitmap.
2298 qemu_get_clock_ns() is a bit expensive, so we only check each some
2301 if ((i
& 63) == 0) {
2302 uint64_t t1
= (qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - t0
) / 1000000;
2303 if (t1
> MAX_WAIT
) {
2304 trace_ram_save_iterate_big_wait(t1
, i
);
2310 flush_compressed_data(rs
);
2314 * Must occur before EOS (or any QEMUFile operation)
2315 * because of RDMA protocol.
2317 ram_control_after_iterate(f
, RAM_CONTROL_ROUND
);
2320 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2321 ram_counters
.transferred
+= 8;
2323 ret
= qemu_file_get_error(f
);
2332 * ram_save_complete: function called to send the remaining amount of ram
2334 * Returns zero to indicate success
2336 * Called with iothread lock
2338 * @f: QEMUFile where to send the data
2339 * @opaque: RAMState pointer
2341 static int ram_save_complete(QEMUFile
*f
, void *opaque
)
2343 RAMState
**temp
= opaque
;
2344 RAMState
*rs
= *temp
;
2348 if (!migration_in_postcopy()) {
2349 migration_bitmap_sync(rs
);
2352 ram_control_before_iterate(f
, RAM_CONTROL_FINISH
);
2354 /* try transferring iterative blocks of memory */
2356 /* flush all remaining blocks regardless of rate limiting */
2360 pages
= ram_find_and_save_block(rs
, !migration_in_colo_state());
2361 /* no more blocks to sent */
2367 flush_compressed_data(rs
);
2368 ram_control_after_iterate(f
, RAM_CONTROL_FINISH
);
2372 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2377 static void ram_save_pending(QEMUFile
*f
, void *opaque
, uint64_t max_size
,
2378 uint64_t *res_precopy_only
,
2379 uint64_t *res_compatible
,
2380 uint64_t *res_postcopy_only
)
2382 RAMState
**temp
= opaque
;
2383 RAMState
*rs
= *temp
;
2384 uint64_t remaining_size
;
2386 remaining_size
= rs
->migration_dirty_pages
* TARGET_PAGE_SIZE
;
2388 if (!migration_in_postcopy() &&
2389 remaining_size
< max_size
) {
2390 qemu_mutex_lock_iothread();
2392 migration_bitmap_sync(rs
);
2394 qemu_mutex_unlock_iothread();
2395 remaining_size
= rs
->migration_dirty_pages
* TARGET_PAGE_SIZE
;
2398 if (migrate_postcopy_ram()) {
2399 /* We can do postcopy, and all the data is postcopiable */
2400 *res_compatible
+= remaining_size
;
2402 *res_precopy_only
+= remaining_size
;
2406 static int load_xbzrle(QEMUFile
*f
, ram_addr_t addr
, void *host
)
2408 unsigned int xh_len
;
2410 uint8_t *loaded_data
;
2412 /* extract RLE header */
2413 xh_flags
= qemu_get_byte(f
);
2414 xh_len
= qemu_get_be16(f
);
2416 if (xh_flags
!= ENCODING_FLAG_XBZRLE
) {
2417 error_report("Failed to load XBZRLE page - wrong compression!");
2421 if (xh_len
> TARGET_PAGE_SIZE
) {
2422 error_report("Failed to load XBZRLE page - len overflow!");
2425 loaded_data
= XBZRLE
.decoded_buf
;
2426 /* load data and decode */
2427 /* it can change loaded_data to point to an internal buffer */
2428 qemu_get_buffer_in_place(f
, &loaded_data
, xh_len
);
2431 if (xbzrle_decode_buffer(loaded_data
, xh_len
, host
,
2432 TARGET_PAGE_SIZE
) == -1) {
2433 error_report("Failed to load XBZRLE page - decode error!");
2441 * ram_block_from_stream: read a RAMBlock id from the migration stream
2443 * Must be called from within a rcu critical section.
2445 * Returns a pointer from within the RCU-protected ram_list.
2447 * @f: QEMUFile where to read the data from
2448 * @flags: Page flags (mostly to see if it's a continuation of previous block)
2450 static inline RAMBlock
*ram_block_from_stream(QEMUFile
*f
, int flags
)
2452 static RAMBlock
*block
= NULL
;
2456 if (flags
& RAM_SAVE_FLAG_CONTINUE
) {
2458 error_report("Ack, bad migration stream!");
2464 len
= qemu_get_byte(f
);
2465 qemu_get_buffer(f
, (uint8_t *)id
, len
);
2468 block
= qemu_ram_block_by_name(id
);
2470 error_report("Can't find block %s", id
);
2477 static inline void *host_from_ram_block_offset(RAMBlock
*block
,
2480 if (!offset_in_ramblock(block
, offset
)) {
2484 return block
->host
+ offset
;
2488 * ram_handle_compressed: handle the zero page case
2490 * If a page (or a whole RDMA chunk) has been
2491 * determined to be zero, then zap it.
2493 * @host: host address for the zero page
2494 * @ch: what the page is filled from. We only support zero
2495 * @size: size of the zero page
2497 void ram_handle_compressed(void *host
, uint8_t ch
, uint64_t size
)
2499 if (ch
!= 0 || !is_zero_range(host
, size
)) {
2500 memset(host
, ch
, size
);
2504 static void *do_data_decompress(void *opaque
)
2506 DecompressParam
*param
= opaque
;
2507 unsigned long pagesize
;
2511 qemu_mutex_lock(¶m
->mutex
);
2512 while (!param
->quit
) {
2517 qemu_mutex_unlock(¶m
->mutex
);
2519 pagesize
= TARGET_PAGE_SIZE
;
2520 /* uncompress() will return failed in some case, especially
2521 * when the page is dirted when doing the compression, it's
2522 * not a problem because the dirty page will be retransferred
2523 * and uncompress() won't break the data in other pages.
2525 uncompress((Bytef
*)des
, &pagesize
,
2526 (const Bytef
*)param
->compbuf
, len
);
2528 qemu_mutex_lock(&decomp_done_lock
);
2530 qemu_cond_signal(&decomp_done_cond
);
2531 qemu_mutex_unlock(&decomp_done_lock
);
2533 qemu_mutex_lock(¶m
->mutex
);
2535 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
2538 qemu_mutex_unlock(¶m
->mutex
);
2543 static void wait_for_decompress_done(void)
2545 int idx
, thread_count
;
2547 if (!migrate_use_compression()) {
2551 thread_count
= migrate_decompress_threads();
2552 qemu_mutex_lock(&decomp_done_lock
);
2553 for (idx
= 0; idx
< thread_count
; idx
++) {
2554 while (!decomp_param
[idx
].done
) {
2555 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2558 qemu_mutex_unlock(&decomp_done_lock
);
2561 static void compress_threads_load_setup(void)
2563 int i
, thread_count
;
2565 if (!migrate_use_compression()) {
2568 thread_count
= migrate_decompress_threads();
2569 decompress_threads
= g_new0(QemuThread
, thread_count
);
2570 decomp_param
= g_new0(DecompressParam
, thread_count
);
2571 qemu_mutex_init(&decomp_done_lock
);
2572 qemu_cond_init(&decomp_done_cond
);
2573 for (i
= 0; i
< thread_count
; i
++) {
2574 qemu_mutex_init(&decomp_param
[i
].mutex
);
2575 qemu_cond_init(&decomp_param
[i
].cond
);
2576 decomp_param
[i
].compbuf
= g_malloc0(compressBound(TARGET_PAGE_SIZE
));
2577 decomp_param
[i
].done
= true;
2578 decomp_param
[i
].quit
= false;
2579 qemu_thread_create(decompress_threads
+ i
, "decompress",
2580 do_data_decompress
, decomp_param
+ i
,
2581 QEMU_THREAD_JOINABLE
);
2585 static void compress_threads_load_cleanup(void)
2587 int i
, thread_count
;
2589 if (!migrate_use_compression()) {
2592 thread_count
= migrate_decompress_threads();
2593 for (i
= 0; i
< thread_count
; i
++) {
2594 qemu_mutex_lock(&decomp_param
[i
].mutex
);
2595 decomp_param
[i
].quit
= true;
2596 qemu_cond_signal(&decomp_param
[i
].cond
);
2597 qemu_mutex_unlock(&decomp_param
[i
].mutex
);
2599 for (i
= 0; i
< thread_count
; i
++) {
2600 qemu_thread_join(decompress_threads
+ i
);
2601 qemu_mutex_destroy(&decomp_param
[i
].mutex
);
2602 qemu_cond_destroy(&decomp_param
[i
].cond
);
2603 g_free(decomp_param
[i
].compbuf
);
2605 g_free(decompress_threads
);
2606 g_free(decomp_param
);
2607 decompress_threads
= NULL
;
2608 decomp_param
= NULL
;
2611 static void decompress_data_with_multi_threads(QEMUFile
*f
,
2612 void *host
, int len
)
2614 int idx
, thread_count
;
2616 thread_count
= migrate_decompress_threads();
2617 qemu_mutex_lock(&decomp_done_lock
);
2619 for (idx
= 0; idx
< thread_count
; idx
++) {
2620 if (decomp_param
[idx
].done
) {
2621 decomp_param
[idx
].done
= false;
2622 qemu_mutex_lock(&decomp_param
[idx
].mutex
);
2623 qemu_get_buffer(f
, decomp_param
[idx
].compbuf
, len
);
2624 decomp_param
[idx
].des
= host
;
2625 decomp_param
[idx
].len
= len
;
2626 qemu_cond_signal(&decomp_param
[idx
].cond
);
2627 qemu_mutex_unlock(&decomp_param
[idx
].mutex
);
2631 if (idx
< thread_count
) {
2634 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2637 qemu_mutex_unlock(&decomp_done_lock
);
2641 * ram_load_setup: Setup RAM for migration incoming side
2643 * Returns zero to indicate success and negative for error
2645 * @f: QEMUFile where to receive the data
2646 * @opaque: RAMState pointer
2648 static int ram_load_setup(QEMUFile
*f
, void *opaque
)
2650 xbzrle_load_setup();
2651 compress_threads_load_setup();
2652 ramblock_recv_map_init();
2656 static int ram_load_cleanup(void *opaque
)
2659 xbzrle_load_cleanup();
2660 compress_threads_load_cleanup();
2662 RAMBLOCK_FOREACH(rb
) {
2663 g_free(rb
->receivedmap
);
2664 rb
->receivedmap
= NULL
;
2670 * ram_postcopy_incoming_init: allocate postcopy data structures
2672 * Returns 0 for success and negative if there was one error
2674 * @mis: current migration incoming state
2676 * Allocate data structures etc needed by incoming migration with
2677 * postcopy-ram. postcopy-ram's similarly names
2678 * postcopy_ram_incoming_init does the work.
2680 int ram_postcopy_incoming_init(MigrationIncomingState
*mis
)
2682 unsigned long ram_pages
= last_ram_page();
2684 return postcopy_ram_incoming_init(mis
, ram_pages
);
2688 * ram_load_postcopy: load a page in postcopy case
2690 * Returns 0 for success or -errno in case of error
2692 * Called in postcopy mode by ram_load().
2693 * rcu_read_lock is taken prior to this being called.
2695 * @f: QEMUFile where to send the data
2697 static int ram_load_postcopy(QEMUFile
*f
)
2699 int flags
= 0, ret
= 0;
2700 bool place_needed
= false;
2701 bool matching_page_sizes
= false;
2702 MigrationIncomingState
*mis
= migration_incoming_get_current();
2703 /* Temporary page that is later 'placed' */
2704 void *postcopy_host_page
= postcopy_get_tmp_page(mis
);
2705 void *last_host
= NULL
;
2706 bool all_zero
= false;
2708 while (!ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
2711 void *page_buffer
= NULL
;
2712 void *place_source
= NULL
;
2713 RAMBlock
*block
= NULL
;
2716 addr
= qemu_get_be64(f
);
2719 * If qemu file error, we should stop here, and then "addr"
2722 ret
= qemu_file_get_error(f
);
2727 flags
= addr
& ~TARGET_PAGE_MASK
;
2728 addr
&= TARGET_PAGE_MASK
;
2730 trace_ram_load_postcopy_loop((uint64_t)addr
, flags
);
2731 place_needed
= false;
2732 if (flags
& (RAM_SAVE_FLAG_ZERO
| RAM_SAVE_FLAG_PAGE
)) {
2733 block
= ram_block_from_stream(f
, flags
);
2735 host
= host_from_ram_block_offset(block
, addr
);
2737 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
2741 matching_page_sizes
= block
->page_size
== TARGET_PAGE_SIZE
;
2743 * Postcopy requires that we place whole host pages atomically;
2744 * these may be huge pages for RAMBlocks that are backed by
2746 * To make it atomic, the data is read into a temporary page
2747 * that's moved into place later.
2748 * The migration protocol uses, possibly smaller, target-pages
2749 * however the source ensures it always sends all the components
2750 * of a host page in order.
2752 page_buffer
= postcopy_host_page
+
2753 ((uintptr_t)host
& (block
->page_size
- 1));
2754 /* If all TP are zero then we can optimise the place */
2755 if (!((uintptr_t)host
& (block
->page_size
- 1))) {
2758 /* not the 1st TP within the HP */
2759 if (host
!= (last_host
+ TARGET_PAGE_SIZE
)) {
2760 error_report("Non-sequential target page %p/%p",
2769 * If it's the last part of a host page then we place the host
2772 place_needed
= (((uintptr_t)host
+ TARGET_PAGE_SIZE
) &
2773 (block
->page_size
- 1)) == 0;
2774 place_source
= postcopy_host_page
;
2778 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
2779 case RAM_SAVE_FLAG_ZERO
:
2780 ch
= qemu_get_byte(f
);
2781 memset(page_buffer
, ch
, TARGET_PAGE_SIZE
);
2787 case RAM_SAVE_FLAG_PAGE
:
2789 if (!place_needed
|| !matching_page_sizes
) {
2790 qemu_get_buffer(f
, page_buffer
, TARGET_PAGE_SIZE
);
2792 /* Avoids the qemu_file copy during postcopy, which is
2793 * going to do a copy later; can only do it when we
2794 * do this read in one go (matching page sizes)
2796 qemu_get_buffer_in_place(f
, (uint8_t **)&place_source
,
2800 case RAM_SAVE_FLAG_EOS
:
2804 error_report("Unknown combination of migration flags: %#x"
2805 " (postcopy mode)", flags
);
2810 /* Detect for any possible file errors */
2811 if (!ret
&& qemu_file_get_error(f
)) {
2812 ret
= qemu_file_get_error(f
);
2815 if (!ret
&& place_needed
) {
2816 /* This gets called at the last target page in the host page */
2817 void *place_dest
= host
+ TARGET_PAGE_SIZE
- block
->page_size
;
2820 ret
= postcopy_place_page_zero(mis
, place_dest
,
2823 ret
= postcopy_place_page(mis
, place_dest
,
2824 place_source
, block
);
2832 static bool postcopy_is_advised(void)
2834 PostcopyState ps
= postcopy_state_get();
2835 return ps
>= POSTCOPY_INCOMING_ADVISE
&& ps
< POSTCOPY_INCOMING_END
;
2838 static bool postcopy_is_running(void)
2840 PostcopyState ps
= postcopy_state_get();
2841 return ps
>= POSTCOPY_INCOMING_LISTENING
&& ps
< POSTCOPY_INCOMING_END
;
2844 static int ram_load(QEMUFile
*f
, void *opaque
, int version_id
)
2846 int flags
= 0, ret
= 0, invalid_flags
= 0;
2847 static uint64_t seq_iter
;
2850 * If system is running in postcopy mode, page inserts to host memory must
2853 bool postcopy_running
= postcopy_is_running();
2854 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2855 bool postcopy_advised
= postcopy_is_advised();
2859 if (version_id
!= 4) {
2863 if (!migrate_use_compression()) {
2864 invalid_flags
|= RAM_SAVE_FLAG_COMPRESS_PAGE
;
2866 /* This RCU critical section can be very long running.
2867 * When RCU reclaims in the code start to become numerous,
2868 * it will be necessary to reduce the granularity of this
2873 if (postcopy_running
) {
2874 ret
= ram_load_postcopy(f
);
2877 while (!postcopy_running
&& !ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
2878 ram_addr_t addr
, total_ram_bytes
;
2882 addr
= qemu_get_be64(f
);
2883 flags
= addr
& ~TARGET_PAGE_MASK
;
2884 addr
&= TARGET_PAGE_MASK
;
2886 if (flags
& invalid_flags
) {
2887 if (flags
& invalid_flags
& RAM_SAVE_FLAG_COMPRESS_PAGE
) {
2888 error_report("Received an unexpected compressed page");
2895 if (flags
& (RAM_SAVE_FLAG_ZERO
| RAM_SAVE_FLAG_PAGE
|
2896 RAM_SAVE_FLAG_COMPRESS_PAGE
| RAM_SAVE_FLAG_XBZRLE
)) {
2897 RAMBlock
*block
= ram_block_from_stream(f
, flags
);
2899 host
= host_from_ram_block_offset(block
, addr
);
2901 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
2905 ramblock_recv_bitmap_set(block
, host
);
2906 trace_ram_load_loop(block
->idstr
, (uint64_t)addr
, flags
, host
);
2909 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
2910 case RAM_SAVE_FLAG_MEM_SIZE
:
2911 /* Synchronize RAM block list */
2912 total_ram_bytes
= addr
;
2913 while (!ret
&& total_ram_bytes
) {
2918 len
= qemu_get_byte(f
);
2919 qemu_get_buffer(f
, (uint8_t *)id
, len
);
2921 length
= qemu_get_be64(f
);
2923 block
= qemu_ram_block_by_name(id
);
2925 if (length
!= block
->used_length
) {
2926 Error
*local_err
= NULL
;
2928 ret
= qemu_ram_resize(block
, length
,
2931 error_report_err(local_err
);
2934 /* For postcopy we need to check hugepage sizes match */
2935 if (postcopy_advised
&&
2936 block
->page_size
!= qemu_host_page_size
) {
2937 uint64_t remote_page_size
= qemu_get_be64(f
);
2938 if (remote_page_size
!= block
->page_size
) {
2939 error_report("Mismatched RAM page size %s "
2940 "(local) %zd != %" PRId64
,
2941 id
, block
->page_size
,
2946 ram_control_load_hook(f
, RAM_CONTROL_BLOCK_REG
,
2949 error_report("Unknown ramblock \"%s\", cannot "
2950 "accept migration", id
);
2954 total_ram_bytes
-= length
;
2958 case RAM_SAVE_FLAG_ZERO
:
2959 ch
= qemu_get_byte(f
);
2960 ram_handle_compressed(host
, ch
, TARGET_PAGE_SIZE
);
2963 case RAM_SAVE_FLAG_PAGE
:
2964 qemu_get_buffer(f
, host
, TARGET_PAGE_SIZE
);
2967 case RAM_SAVE_FLAG_COMPRESS_PAGE
:
2968 len
= qemu_get_be32(f
);
2969 if (len
< 0 || len
> compressBound(TARGET_PAGE_SIZE
)) {
2970 error_report("Invalid compressed data length: %d", len
);
2974 decompress_data_with_multi_threads(f
, host
, len
);
2977 case RAM_SAVE_FLAG_XBZRLE
:
2978 if (load_xbzrle(f
, addr
, host
) < 0) {
2979 error_report("Failed to decompress XBZRLE page at "
2980 RAM_ADDR_FMT
, addr
);
2985 case RAM_SAVE_FLAG_EOS
:
2989 if (flags
& RAM_SAVE_FLAG_HOOK
) {
2990 ram_control_load_hook(f
, RAM_CONTROL_HOOK
, NULL
);
2992 error_report("Unknown combination of migration flags: %#x",
2998 ret
= qemu_file_get_error(f
);
3002 wait_for_decompress_done();
3004 trace_ram_load_complete(ret
, seq_iter
);
3008 static bool ram_has_postcopy(void *opaque
)
3010 return migrate_postcopy_ram();
3013 static SaveVMHandlers savevm_ram_handlers
= {
3014 .save_setup
= ram_save_setup
,
3015 .save_live_iterate
= ram_save_iterate
,
3016 .save_live_complete_postcopy
= ram_save_complete
,
3017 .save_live_complete_precopy
= ram_save_complete
,
3018 .has_postcopy
= ram_has_postcopy
,
3019 .save_live_pending
= ram_save_pending
,
3020 .load_state
= ram_load
,
3021 .save_cleanup
= ram_save_cleanup
,
3022 .load_setup
= ram_load_setup
,
3023 .load_cleanup
= ram_load_cleanup
,
3026 void ram_mig_init(void)
3028 qemu_mutex_init(&XBZRLE
.lock
);
3029 register_savevm_live(NULL
, "ram", 0, 4, &savevm_ram_handlers
, &ram_state
);