4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28 #include "qemu/osdep.h"
29 #include "qemu-common.h"
32 #include "qapi-event.h"
33 #include "qemu/cutils.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "qemu/timer.h"
37 #include "qemu/main-loop.h"
38 #include "migration/migration.h"
39 #include "migration/postcopy-ram.h"
40 #include "exec/address-spaces.h"
41 #include "migration/page_cache.h"
42 #include "qemu/error-report.h"
44 #include "exec/ram_addr.h"
45 #include "qemu/rcu_queue.h"
46 #include "migration/colo.h"
48 /***********************************************************/
49 /* ram save/restore */
51 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
52 #define RAM_SAVE_FLAG_COMPRESS 0x02
53 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
54 #define RAM_SAVE_FLAG_PAGE 0x08
55 #define RAM_SAVE_FLAG_EOS 0x10
56 #define RAM_SAVE_FLAG_CONTINUE 0x20
57 #define RAM_SAVE_FLAG_XBZRLE 0x40
58 /* 0x80 is reserved in migration.h start with 0x100 next */
59 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
61 static uint8_t *ZERO_TARGET_PAGE
;
63 static inline bool is_zero_range(uint8_t *p
, uint64_t size
)
65 return buffer_is_zero(p
, size
);
68 /* struct contains XBZRLE cache and a static page
69 used by the compression */
71 /* buffer used for XBZRLE encoding */
73 /* buffer for storing page content */
75 /* Cache for XBZRLE, Protected by lock. */
80 /* buffer used for XBZRLE decoding */
81 static uint8_t *xbzrle_decoded_buf
;
83 static void XBZRLE_cache_lock(void)
85 if (migrate_use_xbzrle())
86 qemu_mutex_lock(&XBZRLE
.lock
);
89 static void XBZRLE_cache_unlock(void)
91 if (migrate_use_xbzrle())
92 qemu_mutex_unlock(&XBZRLE
.lock
);
96 * xbzrle_cache_resize: resize the xbzrle cache
98 * This function is called from qmp_migrate_set_cache_size in main
99 * thread, possibly while a migration is in progress. A running
100 * migration may be using the cache and might finish during this call,
101 * hence changes to the cache are protected by XBZRLE.lock().
103 * Returns the new_size or negative in case of error.
105 * @new_size: new cache size
107 int64_t xbzrle_cache_resize(int64_t new_size
)
109 PageCache
*new_cache
;
112 if (new_size
< TARGET_PAGE_SIZE
) {
118 if (XBZRLE
.cache
!= NULL
) {
119 if (pow2floor(new_size
) == migrate_xbzrle_cache_size()) {
122 new_cache
= cache_init(new_size
/ TARGET_PAGE_SIZE
,
125 error_report("Error creating cache");
130 cache_fini(XBZRLE
.cache
);
131 XBZRLE
.cache
= new_cache
;
135 ret
= pow2floor(new_size
);
137 XBZRLE_cache_unlock();
141 /* State of RAM for migration */
143 /* Last block that we have visited searching for dirty pages */
144 RAMBlock
*last_seen_block
;
145 /* Last block from where we have sent data */
146 RAMBlock
*last_sent_block
;
147 /* Last offset we have sent data from */
148 ram_addr_t last_offset
;
149 /* last ram version we have seen */
150 uint32_t last_version
;
151 /* We are in the first round */
153 /* How many times we have dirty too many pages */
154 int dirty_rate_high_cnt
;
155 /* How many times we have synchronized the bitmap */
156 uint64_t bitmap_sync_count
;
157 /* these variables are used for bitmap sync */
158 /* last time we did a full bitmap_sync */
159 int64_t time_last_bitmap_sync
;
160 /* bytes transferred at start_time */
161 uint64_t bytes_xfer_prev
;
162 /* number of dirty pages since start_time */
163 uint64_t num_dirty_pages_period
;
164 /* xbzrle misses since the beginning of the period */
165 uint64_t xbzrle_cache_miss_prev
;
166 /* number of iterations at the beginning of period */
167 uint64_t iterations_prev
;
168 /* Accounting fields */
169 /* number of zero pages. It used to be pages filled by the same char. */
171 /* number of normal transferred pages */
173 /* Iterations since start */
176 typedef struct RAMState RAMState
;
178 static RAMState ram_state
;
180 /* accounting for migration statistics */
181 typedef struct AccountingInfo
{
182 uint64_t xbzrle_bytes
;
183 uint64_t xbzrle_pages
;
184 uint64_t xbzrle_cache_miss
;
185 double xbzrle_cache_miss_rate
;
186 uint64_t xbzrle_overflows
;
189 static AccountingInfo acct_info
;
191 static void acct_clear(void)
193 memset(&acct_info
, 0, sizeof(acct_info
));
196 uint64_t dup_mig_pages_transferred(void)
198 return ram_state
.zero_pages
;
201 uint64_t norm_mig_pages_transferred(void)
203 return ram_state
.norm_pages
;
206 uint64_t xbzrle_mig_bytes_transferred(void)
208 return acct_info
.xbzrle_bytes
;
211 uint64_t xbzrle_mig_pages_transferred(void)
213 return acct_info
.xbzrle_pages
;
216 uint64_t xbzrle_mig_pages_cache_miss(void)
218 return acct_info
.xbzrle_cache_miss
;
221 double xbzrle_mig_cache_miss_rate(void)
223 return acct_info
.xbzrle_cache_miss_rate
;
226 uint64_t xbzrle_mig_pages_overflow(void)
228 return acct_info
.xbzrle_overflows
;
231 static QemuMutex migration_bitmap_mutex
;
232 static uint64_t migration_dirty_pages
;
234 /* used by the search for pages to send */
235 struct PageSearchStatus
{
236 /* Current block being searched */
238 /* Current offset to search from */
240 /* Set once we wrap around */
243 typedef struct PageSearchStatus PageSearchStatus
;
245 static struct BitmapRcu
{
247 /* Main migration bitmap */
249 /* bitmap of pages that haven't been sent even once
250 * only maintained and used in postcopy at the moment
251 * where it's used to send the dirtymap at the start
252 * of the postcopy phase
254 unsigned long *unsentmap
;
255 } *migration_bitmap_rcu
;
257 struct CompressParam
{
266 typedef struct CompressParam CompressParam
;
268 struct DecompressParam
{
277 typedef struct DecompressParam DecompressParam
;
279 static CompressParam
*comp_param
;
280 static QemuThread
*compress_threads
;
281 /* comp_done_cond is used to wake up the migration thread when
282 * one of the compression threads has finished the compression.
283 * comp_done_lock is used to co-work with comp_done_cond.
285 static QemuMutex comp_done_lock
;
286 static QemuCond comp_done_cond
;
287 /* The empty QEMUFileOps will be used by file in CompressParam */
288 static const QEMUFileOps empty_ops
= { };
290 static bool compression_switch
;
291 static DecompressParam
*decomp_param
;
292 static QemuThread
*decompress_threads
;
293 static QemuMutex decomp_done_lock
;
294 static QemuCond decomp_done_cond
;
296 static int do_compress_ram_page(QEMUFile
*f
, RAMBlock
*block
,
299 static void *do_data_compress(void *opaque
)
301 CompressParam
*param
= opaque
;
305 qemu_mutex_lock(¶m
->mutex
);
306 while (!param
->quit
) {
308 block
= param
->block
;
309 offset
= param
->offset
;
311 qemu_mutex_unlock(¶m
->mutex
);
313 do_compress_ram_page(param
->file
, block
, offset
);
315 qemu_mutex_lock(&comp_done_lock
);
317 qemu_cond_signal(&comp_done_cond
);
318 qemu_mutex_unlock(&comp_done_lock
);
320 qemu_mutex_lock(¶m
->mutex
);
322 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
325 qemu_mutex_unlock(¶m
->mutex
);
330 static inline void terminate_compression_threads(void)
332 int idx
, thread_count
;
334 thread_count
= migrate_compress_threads();
336 for (idx
= 0; idx
< thread_count
; idx
++) {
337 qemu_mutex_lock(&comp_param
[idx
].mutex
);
338 comp_param
[idx
].quit
= true;
339 qemu_cond_signal(&comp_param
[idx
].cond
);
340 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
344 void migrate_compress_threads_join(void)
348 if (!migrate_use_compression()) {
351 terminate_compression_threads();
352 thread_count
= migrate_compress_threads();
353 for (i
= 0; i
< thread_count
; i
++) {
354 qemu_thread_join(compress_threads
+ i
);
355 qemu_fclose(comp_param
[i
].file
);
356 qemu_mutex_destroy(&comp_param
[i
].mutex
);
357 qemu_cond_destroy(&comp_param
[i
].cond
);
359 qemu_mutex_destroy(&comp_done_lock
);
360 qemu_cond_destroy(&comp_done_cond
);
361 g_free(compress_threads
);
363 compress_threads
= NULL
;
367 void migrate_compress_threads_create(void)
371 if (!migrate_use_compression()) {
374 compression_switch
= true;
375 thread_count
= migrate_compress_threads();
376 compress_threads
= g_new0(QemuThread
, thread_count
);
377 comp_param
= g_new0(CompressParam
, thread_count
);
378 qemu_cond_init(&comp_done_cond
);
379 qemu_mutex_init(&comp_done_lock
);
380 for (i
= 0; i
< thread_count
; i
++) {
381 /* comp_param[i].file is just used as a dummy buffer to save data,
382 * set its ops to empty.
384 comp_param
[i
].file
= qemu_fopen_ops(NULL
, &empty_ops
);
385 comp_param
[i
].done
= true;
386 comp_param
[i
].quit
= false;
387 qemu_mutex_init(&comp_param
[i
].mutex
);
388 qemu_cond_init(&comp_param
[i
].cond
);
389 qemu_thread_create(compress_threads
+ i
, "compress",
390 do_data_compress
, comp_param
+ i
,
391 QEMU_THREAD_JOINABLE
);
396 * save_page_header: write page header to wire
398 * If this is the 1st block, it also writes the block identification
400 * Returns the number of bytes written
402 * @f: QEMUFile where to send the data
403 * @block: block that contains the page we want to send
404 * @offset: offset inside the block for the page
405 * in the lower bits, it contains flags
407 static size_t save_page_header(QEMUFile
*f
, RAMBlock
*block
, ram_addr_t offset
)
411 qemu_put_be64(f
, offset
);
414 if (!(offset
& RAM_SAVE_FLAG_CONTINUE
)) {
415 len
= strlen(block
->idstr
);
416 qemu_put_byte(f
, len
);
417 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, len
);
424 * mig_throttle_guest_down: throotle down the guest
426 * Reduce amount of guest cpu execution to hopefully slow down memory
427 * writes. If guest dirty memory rate is reduced below the rate at
428 * which we can transfer pages to the destination then we should be
429 * able to complete migration. Some workloads dirty memory way too
430 * fast and will not effectively converge, even with auto-converge.
432 static void mig_throttle_guest_down(void)
434 MigrationState
*s
= migrate_get_current();
435 uint64_t pct_initial
= s
->parameters
.cpu_throttle_initial
;
436 uint64_t pct_icrement
= s
->parameters
.cpu_throttle_increment
;
438 /* We have not started throttling yet. Let's start it. */
439 if (!cpu_throttle_active()) {
440 cpu_throttle_set(pct_initial
);
442 /* Throttling already on, just increase the rate */
443 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement
);
448 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
450 * @rs: current RAM state
451 * @current_addr: address for the zero page
453 * Update the xbzrle cache to reflect a page that's been sent as all 0.
454 * The important thing is that a stale (not-yet-0'd) page be replaced
456 * As a bonus, if the page wasn't in the cache it gets added so that
457 * when a small write is made into the 0'd page it gets XBZRLE sent.
459 static void xbzrle_cache_zero_page(RAMState
*rs
, ram_addr_t current_addr
)
461 if (rs
->ram_bulk_stage
|| !migrate_use_xbzrle()) {
465 /* We don't care if this fails to allocate a new cache page
466 * as long as it updated an old one */
467 cache_insert(XBZRLE
.cache
, current_addr
, ZERO_TARGET_PAGE
,
468 rs
->bitmap_sync_count
);
471 #define ENCODING_FLAG_XBZRLE 0x1
474 * save_xbzrle_page: compress and send current page
476 * Returns: 1 means that we wrote the page
477 * 0 means that page is identical to the one already sent
478 * -1 means that xbzrle would be longer than normal
480 * @rs: current RAM state
481 * @f: QEMUFile where to send the data
482 * @current_data: pointer to the address of the page contents
483 * @current_addr: addr of the page
484 * @block: block that contains the page we want to send
485 * @offset: offset inside the block for the page
486 * @last_stage: if we are at the completion stage
487 * @bytes_transferred: increase it with the number of transferred bytes
489 static int save_xbzrle_page(RAMState
*rs
, QEMUFile
*f
, uint8_t **current_data
,
490 ram_addr_t current_addr
, RAMBlock
*block
,
491 ram_addr_t offset
, bool last_stage
,
492 uint64_t *bytes_transferred
)
494 int encoded_len
= 0, bytes_xbzrle
;
495 uint8_t *prev_cached_page
;
497 if (!cache_is_cached(XBZRLE
.cache
, current_addr
, rs
->bitmap_sync_count
)) {
498 acct_info
.xbzrle_cache_miss
++;
500 if (cache_insert(XBZRLE
.cache
, current_addr
, *current_data
,
501 rs
->bitmap_sync_count
) == -1) {
504 /* update *current_data when the page has been
505 inserted into cache */
506 *current_data
= get_cached_data(XBZRLE
.cache
, current_addr
);
512 prev_cached_page
= get_cached_data(XBZRLE
.cache
, current_addr
);
514 /* save current buffer into memory */
515 memcpy(XBZRLE
.current_buf
, *current_data
, TARGET_PAGE_SIZE
);
517 /* XBZRLE encoding (if there is no overflow) */
518 encoded_len
= xbzrle_encode_buffer(prev_cached_page
, XBZRLE
.current_buf
,
519 TARGET_PAGE_SIZE
, XBZRLE
.encoded_buf
,
521 if (encoded_len
== 0) {
522 trace_save_xbzrle_page_skipping();
524 } else if (encoded_len
== -1) {
525 trace_save_xbzrle_page_overflow();
526 acct_info
.xbzrle_overflows
++;
527 /* update data in the cache */
529 memcpy(prev_cached_page
, *current_data
, TARGET_PAGE_SIZE
);
530 *current_data
= prev_cached_page
;
535 /* we need to update the data in the cache, in order to get the same data */
537 memcpy(prev_cached_page
, XBZRLE
.current_buf
, TARGET_PAGE_SIZE
);
540 /* Send XBZRLE based compressed page */
541 bytes_xbzrle
= save_page_header(f
, block
, offset
| RAM_SAVE_FLAG_XBZRLE
);
542 qemu_put_byte(f
, ENCODING_FLAG_XBZRLE
);
543 qemu_put_be16(f
, encoded_len
);
544 qemu_put_buffer(f
, XBZRLE
.encoded_buf
, encoded_len
);
545 bytes_xbzrle
+= encoded_len
+ 1 + 2;
546 acct_info
.xbzrle_pages
++;
547 acct_info
.xbzrle_bytes
+= bytes_xbzrle
;
548 *bytes_transferred
+= bytes_xbzrle
;
554 * migration_bitmap_find_dirty: find the next dirty page from start
556 * Called with rcu_read_lock() to protect migration_bitmap
558 * Returns the byte offset within memory region of the start of a dirty page
560 * @rs: current RAM state
561 * @rb: RAMBlock where to search for dirty pages
562 * @start: starting address (typically so we can continue from previous page)
563 * @ram_addr_abs: pointer into which to store the address of the dirty page
564 * within the global ram_addr space
567 ram_addr_t
migration_bitmap_find_dirty(RAMState
*rs
, RAMBlock
*rb
,
569 ram_addr_t
*ram_addr_abs
)
571 unsigned long base
= rb
->offset
>> TARGET_PAGE_BITS
;
572 unsigned long nr
= base
+ (start
>> TARGET_PAGE_BITS
);
573 uint64_t rb_size
= rb
->used_length
;
574 unsigned long size
= base
+ (rb_size
>> TARGET_PAGE_BITS
);
575 unsigned long *bitmap
;
579 bitmap
= atomic_rcu_read(&migration_bitmap_rcu
)->bmap
;
580 if (rs
->ram_bulk_stage
&& nr
> base
) {
583 next
= find_next_bit(bitmap
, size
, nr
);
586 *ram_addr_abs
= next
<< TARGET_PAGE_BITS
;
587 return (next
- base
) << TARGET_PAGE_BITS
;
590 static inline bool migration_bitmap_clear_dirty(ram_addr_t addr
)
593 int nr
= addr
>> TARGET_PAGE_BITS
;
594 unsigned long *bitmap
= atomic_rcu_read(&migration_bitmap_rcu
)->bmap
;
596 ret
= test_and_clear_bit(nr
, bitmap
);
599 migration_dirty_pages
--;
604 static void migration_bitmap_sync_range(RAMState
*rs
, ram_addr_t start
,
607 unsigned long *bitmap
;
608 bitmap
= atomic_rcu_read(&migration_bitmap_rcu
)->bmap
;
609 migration_dirty_pages
+= cpu_physical_memory_sync_dirty_bitmap(bitmap
,
610 start
, length
, &rs
->num_dirty_pages_period
);
613 static void migration_bitmap_sync_init(RAMState
*rs
)
615 rs
->time_last_bitmap_sync
= 0;
616 rs
->bytes_xfer_prev
= 0;
617 rs
->num_dirty_pages_period
= 0;
618 rs
->xbzrle_cache_miss_prev
= 0;
619 rs
->iterations_prev
= 0;
623 * ram_pagesize_summary: calculate all the pagesizes of a VM
625 * Returns a summary bitmap of the page sizes of all RAMBlocks
627 * For VMs with just normal pages this is equivalent to the host page
628 * size. If it's got some huge pages then it's the OR of all the
629 * different page sizes.
631 uint64_t ram_pagesize_summary(void)
634 uint64_t summary
= 0;
636 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
637 summary
|= block
->page_size
;
643 static void migration_bitmap_sync(RAMState
*rs
)
646 MigrationState
*s
= migrate_get_current();
648 uint64_t bytes_xfer_now
;
650 rs
->bitmap_sync_count
++;
652 if (!rs
->bytes_xfer_prev
) {
653 rs
->bytes_xfer_prev
= ram_bytes_transferred();
656 if (!rs
->time_last_bitmap_sync
) {
657 rs
->time_last_bitmap_sync
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
660 trace_migration_bitmap_sync_start();
661 memory_global_dirty_log_sync();
663 qemu_mutex_lock(&migration_bitmap_mutex
);
665 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
666 migration_bitmap_sync_range(rs
, block
->offset
, block
->used_length
);
669 qemu_mutex_unlock(&migration_bitmap_mutex
);
671 trace_migration_bitmap_sync_end(rs
->num_dirty_pages_period
);
673 end_time
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
675 /* more than 1 second = 1000 millisecons */
676 if (end_time
> rs
->time_last_bitmap_sync
+ 1000) {
677 if (migrate_auto_converge()) {
678 /* The following detection logic can be refined later. For now:
679 Check to see if the dirtied bytes is 50% more than the approx.
680 amount of bytes that just got transferred since the last time we
681 were in this routine. If that happens twice, start or increase
683 bytes_xfer_now
= ram_bytes_transferred();
685 if (s
->dirty_pages_rate
&&
686 (rs
->num_dirty_pages_period
* TARGET_PAGE_SIZE
>
687 (bytes_xfer_now
- rs
->bytes_xfer_prev
) / 2) &&
688 (rs
->dirty_rate_high_cnt
++ >= 2)) {
689 trace_migration_throttle();
690 rs
->dirty_rate_high_cnt
= 0;
691 mig_throttle_guest_down();
693 rs
->bytes_xfer_prev
= bytes_xfer_now
;
696 if (migrate_use_xbzrle()) {
697 if (rs
->iterations_prev
!= rs
->iterations
) {
698 acct_info
.xbzrle_cache_miss_rate
=
699 (double)(acct_info
.xbzrle_cache_miss
-
700 rs
->xbzrle_cache_miss_prev
) /
701 (rs
->iterations
- rs
->iterations_prev
);
703 rs
->iterations_prev
= rs
->iterations
;
704 rs
->xbzrle_cache_miss_prev
= acct_info
.xbzrle_cache_miss
;
706 s
->dirty_pages_rate
= rs
->num_dirty_pages_period
* 1000
707 / (end_time
- rs
->time_last_bitmap_sync
);
708 s
->dirty_bytes_rate
= s
->dirty_pages_rate
* TARGET_PAGE_SIZE
;
709 rs
->time_last_bitmap_sync
= end_time
;
710 rs
->num_dirty_pages_period
= 0;
712 s
->dirty_sync_count
= rs
->bitmap_sync_count
;
713 if (migrate_use_events()) {
714 qapi_event_send_migration_pass(rs
->bitmap_sync_count
, NULL
);
719 * save_zero_page: send the zero page to the stream
721 * Returns the number of pages written.
723 * @rs: current RAM state
724 * @f: QEMUFile where to send the data
725 * @block: block that contains the page we want to send
726 * @offset: offset inside the block for the page
727 * @p: pointer to the page
728 * @bytes_transferred: increase it with the number of transferred bytes
730 static int save_zero_page(RAMState
*rs
, QEMUFile
*f
, RAMBlock
*block
,
732 uint8_t *p
, uint64_t *bytes_transferred
)
736 if (is_zero_range(p
, TARGET_PAGE_SIZE
)) {
738 *bytes_transferred
+= save_page_header(f
, block
,
739 offset
| RAM_SAVE_FLAG_COMPRESS
);
741 *bytes_transferred
+= 1;
748 static void ram_release_pages(MigrationState
*ms
, const char *rbname
,
749 uint64_t offset
, int pages
)
751 if (!migrate_release_ram() || !migration_in_postcopy(ms
)) {
755 ram_discard_range(NULL
, rbname
, offset
, pages
<< TARGET_PAGE_BITS
);
759 * ram_save_page: send the given page to the stream
761 * Returns the number of pages written.
763 * >=0 - Number of pages written - this might legally be 0
764 * if xbzrle noticed the page was the same.
766 * @rs: current RAM state
767 * @ms: current migration state
768 * @f: QEMUFile where to send the data
769 * @block: block that contains the page we want to send
770 * @offset: offset inside the block for the page
771 * @last_stage: if we are at the completion stage
772 * @bytes_transferred: increase it with the number of transferred bytes
774 static int ram_save_page(RAMState
*rs
, MigrationState
*ms
, QEMUFile
*f
,
775 PageSearchStatus
*pss
, bool last_stage
,
776 uint64_t *bytes_transferred
)
780 ram_addr_t current_addr
;
783 bool send_async
= true;
784 RAMBlock
*block
= pss
->block
;
785 ram_addr_t offset
= pss
->offset
;
787 p
= block
->host
+ offset
;
789 /* In doubt sent page as normal */
791 ret
= ram_control_save_page(f
, block
->offset
,
792 offset
, TARGET_PAGE_SIZE
, &bytes_xmit
);
794 *bytes_transferred
+= bytes_xmit
;
800 current_addr
= block
->offset
+ offset
;
802 if (block
== rs
->last_sent_block
) {
803 offset
|= RAM_SAVE_FLAG_CONTINUE
;
805 if (ret
!= RAM_SAVE_CONTROL_NOT_SUPP
) {
806 if (ret
!= RAM_SAVE_CONTROL_DELAYED
) {
807 if (bytes_xmit
> 0) {
809 } else if (bytes_xmit
== 0) {
814 pages
= save_zero_page(rs
, f
, block
, offset
, p
, bytes_transferred
);
816 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
817 * page would be stale
819 xbzrle_cache_zero_page(rs
, current_addr
);
820 ram_release_pages(ms
, block
->idstr
, pss
->offset
, pages
);
821 } else if (!rs
->ram_bulk_stage
&&
822 !migration_in_postcopy(ms
) && migrate_use_xbzrle()) {
823 pages
= save_xbzrle_page(rs
, f
, &p
, current_addr
, block
,
824 offset
, last_stage
, bytes_transferred
);
826 /* Can't send this cached data async, since the cache page
827 * might get updated before it gets to the wire
834 /* XBZRLE overflow or normal page */
836 *bytes_transferred
+= save_page_header(f
, block
,
837 offset
| RAM_SAVE_FLAG_PAGE
);
839 qemu_put_buffer_async(f
, p
, TARGET_PAGE_SIZE
,
840 migrate_release_ram() &
841 migration_in_postcopy(ms
));
843 qemu_put_buffer(f
, p
, TARGET_PAGE_SIZE
);
845 *bytes_transferred
+= TARGET_PAGE_SIZE
;
850 XBZRLE_cache_unlock();
855 static int do_compress_ram_page(QEMUFile
*f
, RAMBlock
*block
,
858 int bytes_sent
, blen
;
859 uint8_t *p
= block
->host
+ (offset
& TARGET_PAGE_MASK
);
861 bytes_sent
= save_page_header(f
, block
, offset
|
862 RAM_SAVE_FLAG_COMPRESS_PAGE
);
863 blen
= qemu_put_compression_data(f
, p
, TARGET_PAGE_SIZE
,
864 migrate_compress_level());
867 qemu_file_set_error(migrate_get_current()->to_dst_file
, blen
);
868 error_report("compressed data failed!");
871 ram_release_pages(migrate_get_current(), block
->idstr
,
872 offset
& TARGET_PAGE_MASK
, 1);
878 static uint64_t bytes_transferred
;
880 static void flush_compressed_data(QEMUFile
*f
)
882 int idx
, len
, thread_count
;
884 if (!migrate_use_compression()) {
887 thread_count
= migrate_compress_threads();
889 qemu_mutex_lock(&comp_done_lock
);
890 for (idx
= 0; idx
< thread_count
; idx
++) {
891 while (!comp_param
[idx
].done
) {
892 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
895 qemu_mutex_unlock(&comp_done_lock
);
897 for (idx
= 0; idx
< thread_count
; idx
++) {
898 qemu_mutex_lock(&comp_param
[idx
].mutex
);
899 if (!comp_param
[idx
].quit
) {
900 len
= qemu_put_qemu_file(f
, comp_param
[idx
].file
);
901 bytes_transferred
+= len
;
903 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
907 static inline void set_compress_params(CompressParam
*param
, RAMBlock
*block
,
910 param
->block
= block
;
911 param
->offset
= offset
;
914 static int compress_page_with_multi_thread(RAMState
*rs
, QEMUFile
*f
,
915 RAMBlock
*block
, ram_addr_t offset
,
916 uint64_t *bytes_transferred
)
918 int idx
, thread_count
, bytes_xmit
= -1, pages
= -1;
920 thread_count
= migrate_compress_threads();
921 qemu_mutex_lock(&comp_done_lock
);
923 for (idx
= 0; idx
< thread_count
; idx
++) {
924 if (comp_param
[idx
].done
) {
925 comp_param
[idx
].done
= false;
926 bytes_xmit
= qemu_put_qemu_file(f
, comp_param
[idx
].file
);
927 qemu_mutex_lock(&comp_param
[idx
].mutex
);
928 set_compress_params(&comp_param
[idx
], block
, offset
);
929 qemu_cond_signal(&comp_param
[idx
].cond
);
930 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
933 *bytes_transferred
+= bytes_xmit
;
940 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
943 qemu_mutex_unlock(&comp_done_lock
);
949 * ram_save_compressed_page: compress the given page and send it to the stream
951 * Returns the number of pages written.
953 * @rs: current RAM state
954 * @ms: current migration state
955 * @f: QEMUFile where to send the data
956 * @block: block that contains the page we want to send
957 * @offset: offset inside the block for the page
958 * @last_stage: if we are at the completion stage
959 * @bytes_transferred: increase it with the number of transferred bytes
961 static int ram_save_compressed_page(RAMState
*rs
, MigrationState
*ms
,
963 PageSearchStatus
*pss
, bool last_stage
,
964 uint64_t *bytes_transferred
)
967 uint64_t bytes_xmit
= 0;
970 RAMBlock
*block
= pss
->block
;
971 ram_addr_t offset
= pss
->offset
;
973 p
= block
->host
+ offset
;
975 ret
= ram_control_save_page(f
, block
->offset
,
976 offset
, TARGET_PAGE_SIZE
, &bytes_xmit
);
978 *bytes_transferred
+= bytes_xmit
;
981 if (ret
!= RAM_SAVE_CONTROL_NOT_SUPP
) {
982 if (ret
!= RAM_SAVE_CONTROL_DELAYED
) {
983 if (bytes_xmit
> 0) {
985 } else if (bytes_xmit
== 0) {
990 /* When starting the process of a new block, the first page of
991 * the block should be sent out before other pages in the same
992 * block, and all the pages in last block should have been sent
993 * out, keeping this order is important, because the 'cont' flag
994 * is used to avoid resending the block name.
996 if (block
!= rs
->last_sent_block
) {
997 flush_compressed_data(f
);
998 pages
= save_zero_page(rs
, f
, block
, offset
, p
, bytes_transferred
);
1000 /* Make sure the first page is sent out before other pages */
1001 bytes_xmit
= save_page_header(f
, block
, offset
|
1002 RAM_SAVE_FLAG_COMPRESS_PAGE
);
1003 blen
= qemu_put_compression_data(f
, p
, TARGET_PAGE_SIZE
,
1004 migrate_compress_level());
1006 *bytes_transferred
+= bytes_xmit
+ blen
;
1010 qemu_file_set_error(f
, blen
);
1011 error_report("compressed data failed!");
1015 ram_release_pages(ms
, block
->idstr
, pss
->offset
, pages
);
1018 offset
|= RAM_SAVE_FLAG_CONTINUE
;
1019 pages
= save_zero_page(rs
, f
, block
, offset
, p
, bytes_transferred
);
1021 pages
= compress_page_with_multi_thread(rs
, f
, block
, offset
,
1024 ram_release_pages(ms
, block
->idstr
, pss
->offset
, pages
);
1033 * find_dirty_block: find the next dirty page and update any state
1034 * associated with the search process.
1036 * Returns if a page is found
1038 * @rs: current RAM state
1039 * @f: QEMUFile where to send the data
1040 * @pss: data about the state of the current dirty page scan
1041 * @again: set to false if the search has scanned the whole of RAM
1042 * @ram_addr_abs: pointer into which to store the address of the dirty page
1043 * within the global ram_addr space
1045 static bool find_dirty_block(RAMState
*rs
, QEMUFile
*f
, PageSearchStatus
*pss
,
1046 bool *again
, ram_addr_t
*ram_addr_abs
)
1048 pss
->offset
= migration_bitmap_find_dirty(rs
, pss
->block
, pss
->offset
,
1050 if (pss
->complete_round
&& pss
->block
== rs
->last_seen_block
&&
1051 pss
->offset
>= rs
->last_offset
) {
1053 * We've been once around the RAM and haven't found anything.
1059 if (pss
->offset
>= pss
->block
->used_length
) {
1060 /* Didn't find anything in this RAM Block */
1062 pss
->block
= QLIST_NEXT_RCU(pss
->block
, next
);
1064 /* Hit the end of the list */
1065 pss
->block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1066 /* Flag that we've looped */
1067 pss
->complete_round
= true;
1068 rs
->ram_bulk_stage
= false;
1069 if (migrate_use_xbzrle()) {
1070 /* If xbzrle is on, stop using the data compression at this
1071 * point. In theory, xbzrle can do better than compression.
1073 flush_compressed_data(f
);
1074 compression_switch
= false;
1077 /* Didn't find anything this time, but try again on the new block */
1081 /* Can go around again, but... */
1083 /* We've found something so probably don't need to */
1089 * unqueue_page: gets a page of the queue
1091 * Helper for 'get_queued_page' - gets a page off the queue
1093 * Returns the block of the page (or NULL if none available)
1095 * @ms: current migration state
1096 * @offset: used to return the offset within the RAMBlock
1097 * @ram_addr_abs: pointer into which to store the address of the dirty page
1098 * within the global ram_addr space
1100 static RAMBlock
*unqueue_page(MigrationState
*ms
, ram_addr_t
*offset
,
1101 ram_addr_t
*ram_addr_abs
)
1103 RAMBlock
*block
= NULL
;
1105 qemu_mutex_lock(&ms
->src_page_req_mutex
);
1106 if (!QSIMPLEQ_EMPTY(&ms
->src_page_requests
)) {
1107 struct MigrationSrcPageRequest
*entry
=
1108 QSIMPLEQ_FIRST(&ms
->src_page_requests
);
1110 *offset
= entry
->offset
;
1111 *ram_addr_abs
= (entry
->offset
+ entry
->rb
->offset
) &
1114 if (entry
->len
> TARGET_PAGE_SIZE
) {
1115 entry
->len
-= TARGET_PAGE_SIZE
;
1116 entry
->offset
+= TARGET_PAGE_SIZE
;
1118 memory_region_unref(block
->mr
);
1119 QSIMPLEQ_REMOVE_HEAD(&ms
->src_page_requests
, next_req
);
1123 qemu_mutex_unlock(&ms
->src_page_req_mutex
);
1129 * get_queued_page: unqueue a page from the postocpy requests
1131 * Skips pages that are already sent (!dirty)
1133 * Returns if a queued page is found
1135 * @rs: current RAM state
1136 * @ms: current migration state
1137 * @pss: data about the state of the current dirty page scan
1138 * @ram_addr_abs: pointer into which to store the address of the dirty page
1139 * within the global ram_addr space
1141 static bool get_queued_page(RAMState
*rs
, MigrationState
*ms
,
1142 PageSearchStatus
*pss
,
1143 ram_addr_t
*ram_addr_abs
)
1150 block
= unqueue_page(ms
, &offset
, ram_addr_abs
);
1152 * We're sending this page, and since it's postcopy nothing else
1153 * will dirty it, and we must make sure it doesn't get sent again
1154 * even if this queue request was received after the background
1155 * search already sent it.
1158 unsigned long *bitmap
;
1159 bitmap
= atomic_rcu_read(&migration_bitmap_rcu
)->bmap
;
1160 dirty
= test_bit(*ram_addr_abs
>> TARGET_PAGE_BITS
, bitmap
);
1162 trace_get_queued_page_not_dirty(
1163 block
->idstr
, (uint64_t)offset
,
1164 (uint64_t)*ram_addr_abs
,
1165 test_bit(*ram_addr_abs
>> TARGET_PAGE_BITS
,
1166 atomic_rcu_read(&migration_bitmap_rcu
)->unsentmap
));
1168 trace_get_queued_page(block
->idstr
,
1170 (uint64_t)*ram_addr_abs
);
1174 } while (block
&& !dirty
);
1178 * As soon as we start servicing pages out of order, then we have
1179 * to kill the bulk stage, since the bulk stage assumes
1180 * in (migration_bitmap_find_and_reset_dirty) that every page is
1181 * dirty, that's no longer true.
1183 rs
->ram_bulk_stage
= false;
1186 * We want the background search to continue from the queued page
1187 * since the guest is likely to want other pages near to the page
1188 * it just requested.
1191 pss
->offset
= offset
;
1198 * migration_page_queue_free: drop any remaining pages in the ram
1201 * It should be empty at the end anyway, but in error cases there may
1202 * be some left. in case that there is any page left, we drop it.
1204 * @ms: current migration state
1206 void migration_page_queue_free(MigrationState
*ms
)
1208 struct MigrationSrcPageRequest
*mspr
, *next_mspr
;
1209 /* This queue generally should be empty - but in the case of a failed
1210 * migration might have some droppings in.
1213 QSIMPLEQ_FOREACH_SAFE(mspr
, &ms
->src_page_requests
, next_req
, next_mspr
) {
1214 memory_region_unref(mspr
->rb
->mr
);
1215 QSIMPLEQ_REMOVE_HEAD(&ms
->src_page_requests
, next_req
);
1222 * ram_save_queue_pages: queue the page for transmission
1224 * A request from postcopy destination for example.
1226 * Returns zero on success or negative on error
1228 * @ms: current migration state
1229 * @rbname: Name of the RAMBLock of the request. NULL means the
1230 * same that last one.
1231 * @start: starting address from the start of the RAMBlock
1232 * @len: length (in bytes) to send
1234 int ram_save_queue_pages(MigrationState
*ms
, const char *rbname
,
1235 ram_addr_t start
, ram_addr_t len
)
1239 ms
->postcopy_requests
++;
1242 /* Reuse last RAMBlock */
1243 ramblock
= ms
->last_req_rb
;
1247 * Shouldn't happen, we can't reuse the last RAMBlock if
1248 * it's the 1st request.
1250 error_report("ram_save_queue_pages no previous block");
1254 ramblock
= qemu_ram_block_by_name(rbname
);
1257 /* We shouldn't be asked for a non-existent RAMBlock */
1258 error_report("ram_save_queue_pages no block '%s'", rbname
);
1261 ms
->last_req_rb
= ramblock
;
1263 trace_ram_save_queue_pages(ramblock
->idstr
, start
, len
);
1264 if (start
+len
> ramblock
->used_length
) {
1265 error_report("%s request overrun start=" RAM_ADDR_FMT
" len="
1266 RAM_ADDR_FMT
" blocklen=" RAM_ADDR_FMT
,
1267 __func__
, start
, len
, ramblock
->used_length
);
1271 struct MigrationSrcPageRequest
*new_entry
=
1272 g_malloc0(sizeof(struct MigrationSrcPageRequest
));
1273 new_entry
->rb
= ramblock
;
1274 new_entry
->offset
= start
;
1275 new_entry
->len
= len
;
1277 memory_region_ref(ramblock
->mr
);
1278 qemu_mutex_lock(&ms
->src_page_req_mutex
);
1279 QSIMPLEQ_INSERT_TAIL(&ms
->src_page_requests
, new_entry
, next_req
);
1280 qemu_mutex_unlock(&ms
->src_page_req_mutex
);
1291 * ram_save_target_page: save one target page
1293 * Returns the number of pages written
1295 * @rs: current RAM state
1296 * @ms: current migration state
1297 * @f: QEMUFile where to send the data
1298 * @pss: data about the page we want to send
1299 * @last_stage: if we are at the completion stage
1300 * @bytes_transferred: increase it with the number of transferred bytes
1301 * @dirty_ram_abs: address of the start of the dirty page in ram_addr_t space
1303 static int ram_save_target_page(RAMState
*rs
, MigrationState
*ms
, QEMUFile
*f
,
1304 PageSearchStatus
*pss
,
1306 uint64_t *bytes_transferred
,
1307 ram_addr_t dirty_ram_abs
)
1311 /* Check the pages is dirty and if it is send it */
1312 if (migration_bitmap_clear_dirty(dirty_ram_abs
)) {
1313 unsigned long *unsentmap
;
1314 if (compression_switch
&& migrate_use_compression()) {
1315 res
= ram_save_compressed_page(rs
, ms
, f
, pss
,
1319 res
= ram_save_page(rs
, ms
, f
, pss
, last_stage
,
1326 unsentmap
= atomic_rcu_read(&migration_bitmap_rcu
)->unsentmap
;
1328 clear_bit(dirty_ram_abs
>> TARGET_PAGE_BITS
, unsentmap
);
1330 /* Only update last_sent_block if a block was actually sent; xbzrle
1331 * might have decided the page was identical so didn't bother writing
1335 rs
->last_sent_block
= pss
->block
;
1343 * ram_save_host_page: save a whole host page
1345 * Starting at *offset send pages up to the end of the current host
1346 * page. It's valid for the initial offset to point into the middle of
1347 * a host page in which case the remainder of the hostpage is sent.
1348 * Only dirty target pages are sent. Note that the host page size may
1349 * be a huge page for this block.
1351 * Returns the number of pages written or negative on error
1353 * @rs: current RAM state
1354 * @ms: current migration state
1355 * @f: QEMUFile where to send the data
1356 * @pss: data about the page we want to send
1357 * @last_stage: if we are at the completion stage
1358 * @bytes_transferred: increase it with the number of transferred bytes
1359 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1361 static int ram_save_host_page(RAMState
*rs
, MigrationState
*ms
, QEMUFile
*f
,
1362 PageSearchStatus
*pss
,
1364 uint64_t *bytes_transferred
,
1365 ram_addr_t dirty_ram_abs
)
1367 int tmppages
, pages
= 0;
1368 size_t pagesize
= qemu_ram_pagesize(pss
->block
);
1371 tmppages
= ram_save_target_page(rs
, ms
, f
, pss
, last_stage
,
1372 bytes_transferred
, dirty_ram_abs
);
1378 pss
->offset
+= TARGET_PAGE_SIZE
;
1379 dirty_ram_abs
+= TARGET_PAGE_SIZE
;
1380 } while (pss
->offset
& (pagesize
- 1));
1382 /* The offset we leave with is the last one we looked at */
1383 pss
->offset
-= TARGET_PAGE_SIZE
;
1388 * ram_find_and_save_block: finds a dirty page and sends it to f
1390 * Called within an RCU critical section.
1392 * Returns the number of pages written where zero means no dirty pages
1394 * @rs: current RAM state
1395 * @f: QEMUFile where to send the data
1396 * @last_stage: if we are at the completion stage
1397 * @bytes_transferred: increase it with the number of transferred bytes
1399 * On systems where host-page-size > target-page-size it will send all the
1400 * pages in a host page that are dirty.
1403 static int ram_find_and_save_block(RAMState
*rs
, QEMUFile
*f
, bool last_stage
,
1404 uint64_t *bytes_transferred
)
1406 PageSearchStatus pss
;
1407 MigrationState
*ms
= migrate_get_current();
1410 ram_addr_t dirty_ram_abs
; /* Address of the start of the dirty page in
1413 /* No dirty page as there is zero RAM */
1414 if (!ram_bytes_total()) {
1418 pss
.block
= rs
->last_seen_block
;
1419 pss
.offset
= rs
->last_offset
;
1420 pss
.complete_round
= false;
1423 pss
.block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1428 found
= get_queued_page(rs
, ms
, &pss
, &dirty_ram_abs
);
1431 /* priority queue empty, so just search for something dirty */
1432 found
= find_dirty_block(rs
, f
, &pss
, &again
, &dirty_ram_abs
);
1436 pages
= ram_save_host_page(rs
, ms
, f
, &pss
,
1437 last_stage
, bytes_transferred
,
1440 } while (!pages
&& again
);
1442 rs
->last_seen_block
= pss
.block
;
1443 rs
->last_offset
= pss
.offset
;
1448 void acct_update_position(QEMUFile
*f
, size_t size
, bool zero
)
1450 uint64_t pages
= size
/ TARGET_PAGE_SIZE
;
1451 RAMState
*rs
= &ram_state
;
1454 rs
->zero_pages
+= pages
;
1456 rs
->norm_pages
+= pages
;
1457 bytes_transferred
+= size
;
1458 qemu_update_position(f
, size
);
1462 static ram_addr_t
ram_save_remaining(void)
1464 return migration_dirty_pages
;
1467 uint64_t ram_bytes_remaining(void)
1469 return ram_save_remaining() * TARGET_PAGE_SIZE
;
1472 uint64_t ram_bytes_transferred(void)
1474 return bytes_transferred
;
1477 uint64_t ram_bytes_total(void)
1483 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
)
1484 total
+= block
->used_length
;
1489 void free_xbzrle_decoded_buf(void)
1491 g_free(xbzrle_decoded_buf
);
1492 xbzrle_decoded_buf
= NULL
;
1495 static void migration_bitmap_free(struct BitmapRcu
*bmap
)
1498 g_free(bmap
->unsentmap
);
1502 static void ram_migration_cleanup(void *opaque
)
1504 /* caller have hold iothread lock or is in a bh, so there is
1505 * no writing race against this migration_bitmap
1507 struct BitmapRcu
*bitmap
= migration_bitmap_rcu
;
1508 atomic_rcu_set(&migration_bitmap_rcu
, NULL
);
1510 memory_global_dirty_log_stop();
1511 call_rcu(bitmap
, migration_bitmap_free
, rcu
);
1514 XBZRLE_cache_lock();
1516 cache_fini(XBZRLE
.cache
);
1517 g_free(XBZRLE
.encoded_buf
);
1518 g_free(XBZRLE
.current_buf
);
1519 g_free(ZERO_TARGET_PAGE
);
1520 XBZRLE
.cache
= NULL
;
1521 XBZRLE
.encoded_buf
= NULL
;
1522 XBZRLE
.current_buf
= NULL
;
1524 XBZRLE_cache_unlock();
1527 static void ram_state_reset(RAMState
*rs
)
1529 rs
->last_seen_block
= NULL
;
1530 rs
->last_sent_block
= NULL
;
1531 rs
->last_offset
= 0;
1532 rs
->last_version
= ram_list
.version
;
1533 rs
->ram_bulk_stage
= true;
1536 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1538 void migration_bitmap_extend(ram_addr_t old
, ram_addr_t
new)
1540 /* called in qemu main thread, so there is
1541 * no writing race against this migration_bitmap
1543 if (migration_bitmap_rcu
) {
1544 struct BitmapRcu
*old_bitmap
= migration_bitmap_rcu
, *bitmap
;
1545 bitmap
= g_new(struct BitmapRcu
, 1);
1546 bitmap
->bmap
= bitmap_new(new);
1548 /* prevent migration_bitmap content from being set bit
1549 * by migration_bitmap_sync_range() at the same time.
1550 * it is safe to migration if migration_bitmap is cleared bit
1553 qemu_mutex_lock(&migration_bitmap_mutex
);
1554 bitmap_copy(bitmap
->bmap
, old_bitmap
->bmap
, old
);
1555 bitmap_set(bitmap
->bmap
, old
, new - old
);
1557 /* We don't have a way to safely extend the sentmap
1558 * with RCU; so mark it as missing, entry to postcopy
1561 bitmap
->unsentmap
= NULL
;
1563 atomic_rcu_set(&migration_bitmap_rcu
, bitmap
);
1564 qemu_mutex_unlock(&migration_bitmap_mutex
);
1565 migration_dirty_pages
+= new - old
;
1566 call_rcu(old_bitmap
, migration_bitmap_free
, rcu
);
1571 * 'expected' is the value you expect the bitmap mostly to be full
1572 * of; it won't bother printing lines that are all this value.
1573 * If 'todump' is null the migration bitmap is dumped.
1575 void ram_debug_dump_bitmap(unsigned long *todump
, bool expected
)
1577 int64_t ram_pages
= last_ram_offset() >> TARGET_PAGE_BITS
;
1580 int64_t linelen
= 128;
1584 todump
= atomic_rcu_read(&migration_bitmap_rcu
)->bmap
;
1587 for (cur
= 0; cur
< ram_pages
; cur
+= linelen
) {
1591 * Last line; catch the case where the line length
1592 * is longer than remaining ram
1594 if (cur
+ linelen
> ram_pages
) {
1595 linelen
= ram_pages
- cur
;
1597 for (curb
= 0; curb
< linelen
; curb
++) {
1598 bool thisbit
= test_bit(cur
+ curb
, todump
);
1599 linebuf
[curb
] = thisbit
? '1' : '.';
1600 found
= found
|| (thisbit
!= expected
);
1603 linebuf
[curb
] = '\0';
1604 fprintf(stderr
, "0x%08" PRIx64
" : %s\n", cur
, linebuf
);
1609 /* **** functions for postcopy ***** */
1611 void ram_postcopy_migrated_memory_release(MigrationState
*ms
)
1613 struct RAMBlock
*block
;
1614 unsigned long *bitmap
= atomic_rcu_read(&migration_bitmap_rcu
)->bmap
;
1616 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1617 unsigned long first
= block
->offset
>> TARGET_PAGE_BITS
;
1618 unsigned long range
= first
+ (block
->used_length
>> TARGET_PAGE_BITS
);
1619 unsigned long run_start
= find_next_zero_bit(bitmap
, range
, first
);
1621 while (run_start
< range
) {
1622 unsigned long run_end
= find_next_bit(bitmap
, range
, run_start
+ 1);
1623 ram_discard_range(NULL
, block
->idstr
, run_start
<< TARGET_PAGE_BITS
,
1624 (run_end
- run_start
) << TARGET_PAGE_BITS
);
1625 run_start
= find_next_zero_bit(bitmap
, range
, run_end
+ 1);
1631 * postcopy_send_discard_bm_ram: discard a RAMBlock
1633 * Returns zero on success
1635 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1636 * Note: At this point the 'unsentmap' is the processed bitmap combined
1637 * with the dirtymap; so a '1' means it's either dirty or unsent.
1639 * @ms: current migration state
1640 * @pds: state for postcopy
1641 * @start: RAMBlock starting page
1642 * @length: RAMBlock size
1644 static int postcopy_send_discard_bm_ram(MigrationState
*ms
,
1645 PostcopyDiscardState
*pds
,
1646 unsigned long start
,
1647 unsigned long length
)
1649 unsigned long end
= start
+ length
; /* one after the end */
1650 unsigned long current
;
1651 unsigned long *unsentmap
;
1653 unsentmap
= atomic_rcu_read(&migration_bitmap_rcu
)->unsentmap
;
1654 for (current
= start
; current
< end
; ) {
1655 unsigned long one
= find_next_bit(unsentmap
, end
, current
);
1658 unsigned long zero
= find_next_zero_bit(unsentmap
, end
, one
+ 1);
1659 unsigned long discard_length
;
1662 discard_length
= end
- one
;
1664 discard_length
= zero
- one
;
1666 if (discard_length
) {
1667 postcopy_discard_send_range(ms
, pds
, one
, discard_length
);
1669 current
= one
+ discard_length
;
1679 * postcopy_each_ram_send_discard: discard all RAMBlocks
1681 * Returns 0 for success or negative for error
1683 * Utility for the outgoing postcopy code.
1684 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1685 * passing it bitmap indexes and name.
1686 * (qemu_ram_foreach_block ends up passing unscaled lengths
1687 * which would mean postcopy code would have to deal with target page)
1689 * @ms: current migration state
1691 static int postcopy_each_ram_send_discard(MigrationState
*ms
)
1693 struct RAMBlock
*block
;
1696 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1697 unsigned long first
= block
->offset
>> TARGET_PAGE_BITS
;
1698 PostcopyDiscardState
*pds
= postcopy_discard_send_init(ms
,
1703 * Postcopy sends chunks of bitmap over the wire, but it
1704 * just needs indexes at this point, avoids it having
1705 * target page specific code.
1707 ret
= postcopy_send_discard_bm_ram(ms
, pds
, first
,
1708 block
->used_length
>> TARGET_PAGE_BITS
);
1709 postcopy_discard_send_finish(ms
, pds
);
1719 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1721 * Helper for postcopy_chunk_hostpages; it's called twice to
1722 * canonicalize the two bitmaps, that are similar, but one is
1725 * Postcopy requires that all target pages in a hostpage are dirty or
1726 * clean, not a mix. This function canonicalizes the bitmaps.
1728 * @ms: current migration state
1729 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1730 * otherwise we need to canonicalize partially dirty host pages
1731 * @block: block that contains the page we want to canonicalize
1732 * @pds: state for postcopy
1734 static void postcopy_chunk_hostpages_pass(MigrationState
*ms
, bool unsent_pass
,
1736 PostcopyDiscardState
*pds
)
1738 unsigned long *bitmap
;
1739 unsigned long *unsentmap
;
1740 unsigned int host_ratio
= block
->page_size
/ TARGET_PAGE_SIZE
;
1741 unsigned long first
= block
->offset
>> TARGET_PAGE_BITS
;
1742 unsigned long len
= block
->used_length
>> TARGET_PAGE_BITS
;
1743 unsigned long last
= first
+ (len
- 1);
1744 unsigned long run_start
;
1746 if (block
->page_size
== TARGET_PAGE_SIZE
) {
1747 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1751 bitmap
= atomic_rcu_read(&migration_bitmap_rcu
)->bmap
;
1752 unsentmap
= atomic_rcu_read(&migration_bitmap_rcu
)->unsentmap
;
1755 /* Find a sent page */
1756 run_start
= find_next_zero_bit(unsentmap
, last
+ 1, first
);
1758 /* Find a dirty page */
1759 run_start
= find_next_bit(bitmap
, last
+ 1, first
);
1762 while (run_start
<= last
) {
1763 bool do_fixup
= false;
1764 unsigned long fixup_start_addr
;
1765 unsigned long host_offset
;
1768 * If the start of this run of pages is in the middle of a host
1769 * page, then we need to fixup this host page.
1771 host_offset
= run_start
% host_ratio
;
1774 run_start
-= host_offset
;
1775 fixup_start_addr
= run_start
;
1776 /* For the next pass */
1777 run_start
= run_start
+ host_ratio
;
1779 /* Find the end of this run */
1780 unsigned long run_end
;
1782 run_end
= find_next_bit(unsentmap
, last
+ 1, run_start
+ 1);
1784 run_end
= find_next_zero_bit(bitmap
, last
+ 1, run_start
+ 1);
1787 * If the end isn't at the start of a host page, then the
1788 * run doesn't finish at the end of a host page
1789 * and we need to discard.
1791 host_offset
= run_end
% host_ratio
;
1794 fixup_start_addr
= run_end
- host_offset
;
1796 * This host page has gone, the next loop iteration starts
1797 * from after the fixup
1799 run_start
= fixup_start_addr
+ host_ratio
;
1802 * No discards on this iteration, next loop starts from
1803 * next sent/dirty page
1805 run_start
= run_end
+ 1;
1812 /* Tell the destination to discard this page */
1813 if (unsent_pass
|| !test_bit(fixup_start_addr
, unsentmap
)) {
1814 /* For the unsent_pass we:
1815 * discard partially sent pages
1816 * For the !unsent_pass (dirty) we:
1817 * discard partially dirty pages that were sent
1818 * (any partially sent pages were already discarded
1819 * by the previous unsent_pass)
1821 postcopy_discard_send_range(ms
, pds
, fixup_start_addr
,
1825 /* Clean up the bitmap */
1826 for (page
= fixup_start_addr
;
1827 page
< fixup_start_addr
+ host_ratio
; page
++) {
1828 /* All pages in this host page are now not sent */
1829 set_bit(page
, unsentmap
);
1832 * Remark them as dirty, updating the count for any pages
1833 * that weren't previously dirty.
1835 migration_dirty_pages
+= !test_and_set_bit(page
, bitmap
);
1840 /* Find the next sent page for the next iteration */
1841 run_start
= find_next_zero_bit(unsentmap
, last
+ 1,
1844 /* Find the next dirty page for the next iteration */
1845 run_start
= find_next_bit(bitmap
, last
+ 1, run_start
);
1851 * postcopy_chuck_hostpages: discrad any partially sent host page
1853 * Utility for the outgoing postcopy code.
1855 * Discard any partially sent host-page size chunks, mark any partially
1856 * dirty host-page size chunks as all dirty. In this case the host-page
1857 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1859 * Returns zero on success
1861 * @ms: current migration state
1863 static int postcopy_chunk_hostpages(MigrationState
*ms
)
1865 RAMState
*rs
= &ram_state
;
1866 struct RAMBlock
*block
;
1868 /* Easiest way to make sure we don't resume in the middle of a host-page */
1869 rs
->last_seen_block
= NULL
;
1870 rs
->last_sent_block
= NULL
;
1871 rs
->last_offset
= 0;
1873 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1874 unsigned long first
= block
->offset
>> TARGET_PAGE_BITS
;
1876 PostcopyDiscardState
*pds
=
1877 postcopy_discard_send_init(ms
, first
, block
->idstr
);
1879 /* First pass: Discard all partially sent host pages */
1880 postcopy_chunk_hostpages_pass(ms
, true, block
, pds
);
1882 * Second pass: Ensure that all partially dirty host pages are made
1885 postcopy_chunk_hostpages_pass(ms
, false, block
, pds
);
1887 postcopy_discard_send_finish(ms
, pds
);
1888 } /* ram_list loop */
1894 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1896 * Returns zero on success
1898 * Transmit the set of pages to be discarded after precopy to the target
1899 * these are pages that:
1900 * a) Have been previously transmitted but are now dirty again
1901 * b) Pages that have never been transmitted, this ensures that
1902 * any pages on the destination that have been mapped by background
1903 * tasks get discarded (transparent huge pages is the specific concern)
1904 * Hopefully this is pretty sparse
1906 * @ms: current migration state
1908 int ram_postcopy_send_discard_bitmap(MigrationState
*ms
)
1911 unsigned long *bitmap
, *unsentmap
;
1915 /* This should be our last sync, the src is now paused */
1916 migration_bitmap_sync(&ram_state
);
1918 unsentmap
= atomic_rcu_read(&migration_bitmap_rcu
)->unsentmap
;
1920 /* We don't have a safe way to resize the sentmap, so
1921 * if the bitmap was resized it will be NULL at this
1924 error_report("migration ram resized during precopy phase");
1929 /* Deal with TPS != HPS and huge pages */
1930 ret
= postcopy_chunk_hostpages(ms
);
1937 * Update the unsentmap to be unsentmap = unsentmap | dirty
1939 bitmap
= atomic_rcu_read(&migration_bitmap_rcu
)->bmap
;
1940 bitmap_or(unsentmap
, unsentmap
, bitmap
,
1941 last_ram_offset() >> TARGET_PAGE_BITS
);
1944 trace_ram_postcopy_send_discard_bitmap();
1945 #ifdef DEBUG_POSTCOPY
1946 ram_debug_dump_bitmap(unsentmap
, true);
1949 ret
= postcopy_each_ram_send_discard(ms
);
1956 * ram_discard_range: discard dirtied pages at the beginning of postcopy
1958 * Returns zero on success
1960 * @mis: current migration incoming state
1961 * @rbname: name of the RAMBlock of the request. NULL means the
1962 * same that last one.
1963 * @start: RAMBlock starting page
1964 * @length: RAMBlock size
1966 int ram_discard_range(MigrationIncomingState
*mis
,
1968 uint64_t start
, size_t length
)
1972 trace_ram_discard_range(rbname
, start
, length
);
1975 RAMBlock
*rb
= qemu_ram_block_by_name(rbname
);
1978 error_report("ram_discard_range: Failed to find block '%s'", rbname
);
1982 ret
= ram_block_discard_range(rb
, start
, length
);
1990 static int ram_save_init_globals(RAMState
*rs
)
1992 int64_t ram_bitmap_pages
; /* Size of bitmap in pages, including gaps */
1994 rs
->dirty_rate_high_cnt
= 0;
1995 rs
->bitmap_sync_count
= 0;
1999 migration_bitmap_sync_init(rs
);
2000 qemu_mutex_init(&migration_bitmap_mutex
);
2002 if (migrate_use_xbzrle()) {
2003 XBZRLE_cache_lock();
2004 ZERO_TARGET_PAGE
= g_malloc0(TARGET_PAGE_SIZE
);
2005 XBZRLE
.cache
= cache_init(migrate_xbzrle_cache_size() /
2008 if (!XBZRLE
.cache
) {
2009 XBZRLE_cache_unlock();
2010 error_report("Error creating cache");
2013 XBZRLE_cache_unlock();
2015 /* We prefer not to abort if there is no memory */
2016 XBZRLE
.encoded_buf
= g_try_malloc0(TARGET_PAGE_SIZE
);
2017 if (!XBZRLE
.encoded_buf
) {
2018 error_report("Error allocating encoded_buf");
2022 XBZRLE
.current_buf
= g_try_malloc(TARGET_PAGE_SIZE
);
2023 if (!XBZRLE
.current_buf
) {
2024 error_report("Error allocating current_buf");
2025 g_free(XBZRLE
.encoded_buf
);
2026 XBZRLE
.encoded_buf
= NULL
;
2033 /* For memory_global_dirty_log_start below. */
2034 qemu_mutex_lock_iothread();
2036 qemu_mutex_lock_ramlist();
2038 bytes_transferred
= 0;
2039 ram_state_reset(rs
);
2041 migration_bitmap_rcu
= g_new0(struct BitmapRcu
, 1);
2042 /* Skip setting bitmap if there is no RAM */
2043 if (ram_bytes_total()) {
2044 ram_bitmap_pages
= last_ram_offset() >> TARGET_PAGE_BITS
;
2045 migration_bitmap_rcu
->bmap
= bitmap_new(ram_bitmap_pages
);
2046 bitmap_set(migration_bitmap_rcu
->bmap
, 0, ram_bitmap_pages
);
2048 if (migrate_postcopy_ram()) {
2049 migration_bitmap_rcu
->unsentmap
= bitmap_new(ram_bitmap_pages
);
2050 bitmap_set(migration_bitmap_rcu
->unsentmap
, 0, ram_bitmap_pages
);
2055 * Count the total number of pages used by ram blocks not including any
2056 * gaps due to alignment or unplugs.
2058 migration_dirty_pages
= ram_bytes_total() >> TARGET_PAGE_BITS
;
2060 memory_global_dirty_log_start();
2061 migration_bitmap_sync(rs
);
2062 qemu_mutex_unlock_ramlist();
2063 qemu_mutex_unlock_iothread();
2070 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2071 * long-running RCU critical section. When rcu-reclaims in the code
2072 * start to become numerous it will be necessary to reduce the
2073 * granularity of these critical sections.
2077 * ram_save_setup: Setup RAM for migration
2079 * Returns zero to indicate success and negative for error
2081 * @f: QEMUFile where to send the data
2082 * @opaque: RAMState pointer
2084 static int ram_save_setup(QEMUFile
*f
, void *opaque
)
2086 RAMState
*rs
= opaque
;
2089 /* migration has already setup the bitmap, reuse it. */
2090 if (!migration_in_colo_state()) {
2091 if (ram_save_init_globals(rs
) < 0) {
2098 qemu_put_be64(f
, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE
);
2100 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
2101 qemu_put_byte(f
, strlen(block
->idstr
));
2102 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, strlen(block
->idstr
));
2103 qemu_put_be64(f
, block
->used_length
);
2104 if (migrate_postcopy_ram() && block
->page_size
!= qemu_host_page_size
) {
2105 qemu_put_be64(f
, block
->page_size
);
2111 ram_control_before_iterate(f
, RAM_CONTROL_SETUP
);
2112 ram_control_after_iterate(f
, RAM_CONTROL_SETUP
);
2114 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2120 * ram_save_iterate: iterative stage for migration
2122 * Returns zero to indicate success and negative for error
2124 * @f: QEMUFile where to send the data
2125 * @opaque: RAMState pointer
2127 static int ram_save_iterate(QEMUFile
*f
, void *opaque
)
2129 RAMState
*rs
= opaque
;
2136 if (ram_list
.version
!= rs
->last_version
) {
2137 ram_state_reset(rs
);
2140 /* Read version before ram_list.blocks */
2143 ram_control_before_iterate(f
, RAM_CONTROL_ROUND
);
2145 t0
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2147 while ((ret
= qemu_file_rate_limit(f
)) == 0) {
2150 pages
= ram_find_and_save_block(rs
, f
, false, &bytes_transferred
);
2151 /* no more pages to sent */
2158 /* we want to check in the 1st loop, just in case it was the 1st time
2159 and we had to sync the dirty bitmap.
2160 qemu_get_clock_ns() is a bit expensive, so we only check each some
2163 if ((i
& 63) == 0) {
2164 uint64_t t1
= (qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - t0
) / 1000000;
2165 if (t1
> MAX_WAIT
) {
2166 trace_ram_save_iterate_big_wait(t1
, i
);
2172 flush_compressed_data(f
);
2176 * Must occur before EOS (or any QEMUFile operation)
2177 * because of RDMA protocol.
2179 ram_control_after_iterate(f
, RAM_CONTROL_ROUND
);
2181 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2182 bytes_transferred
+= 8;
2184 ret
= qemu_file_get_error(f
);
2193 * ram_save_complete: function called to send the remaining amount of ram
2195 * Returns zero to indicate success
2197 * Called with iothread lock
2199 * @f: QEMUFile where to send the data
2200 * @opaque: RAMState pointer
2202 static int ram_save_complete(QEMUFile
*f
, void *opaque
)
2204 RAMState
*rs
= opaque
;
2208 if (!migration_in_postcopy(migrate_get_current())) {
2209 migration_bitmap_sync(rs
);
2212 ram_control_before_iterate(f
, RAM_CONTROL_FINISH
);
2214 /* try transferring iterative blocks of memory */
2216 /* flush all remaining blocks regardless of rate limiting */
2220 pages
= ram_find_and_save_block(rs
, f
, !migration_in_colo_state(),
2221 &bytes_transferred
);
2222 /* no more blocks to sent */
2228 flush_compressed_data(f
);
2229 ram_control_after_iterate(f
, RAM_CONTROL_FINISH
);
2233 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2238 static void ram_save_pending(QEMUFile
*f
, void *opaque
, uint64_t max_size
,
2239 uint64_t *non_postcopiable_pending
,
2240 uint64_t *postcopiable_pending
)
2242 RAMState
*rs
= opaque
;
2243 uint64_t remaining_size
;
2245 remaining_size
= ram_save_remaining() * TARGET_PAGE_SIZE
;
2247 if (!migration_in_postcopy(migrate_get_current()) &&
2248 remaining_size
< max_size
) {
2249 qemu_mutex_lock_iothread();
2251 migration_bitmap_sync(rs
);
2253 qemu_mutex_unlock_iothread();
2254 remaining_size
= ram_save_remaining() * TARGET_PAGE_SIZE
;
2257 /* We can do postcopy, and all the data is postcopiable */
2258 *postcopiable_pending
+= remaining_size
;
2261 static int load_xbzrle(QEMUFile
*f
, ram_addr_t addr
, void *host
)
2263 unsigned int xh_len
;
2265 uint8_t *loaded_data
;
2267 if (!xbzrle_decoded_buf
) {
2268 xbzrle_decoded_buf
= g_malloc(TARGET_PAGE_SIZE
);
2270 loaded_data
= xbzrle_decoded_buf
;
2272 /* extract RLE header */
2273 xh_flags
= qemu_get_byte(f
);
2274 xh_len
= qemu_get_be16(f
);
2276 if (xh_flags
!= ENCODING_FLAG_XBZRLE
) {
2277 error_report("Failed to load XBZRLE page - wrong compression!");
2281 if (xh_len
> TARGET_PAGE_SIZE
) {
2282 error_report("Failed to load XBZRLE page - len overflow!");
2285 /* load data and decode */
2286 qemu_get_buffer_in_place(f
, &loaded_data
, xh_len
);
2289 if (xbzrle_decode_buffer(loaded_data
, xh_len
, host
,
2290 TARGET_PAGE_SIZE
) == -1) {
2291 error_report("Failed to load XBZRLE page - decode error!");
2299 * ram_block_from_stream: read a RAMBlock id from the migration stream
2301 * Must be called from within a rcu critical section.
2303 * Returns a pointer from within the RCU-protected ram_list.
2305 * @f: QEMUFile where to read the data from
2306 * @flags: Page flags (mostly to see if it's a continuation of previous block)
2308 static inline RAMBlock
*ram_block_from_stream(QEMUFile
*f
, int flags
)
2310 static RAMBlock
*block
= NULL
;
2314 if (flags
& RAM_SAVE_FLAG_CONTINUE
) {
2316 error_report("Ack, bad migration stream!");
2322 len
= qemu_get_byte(f
);
2323 qemu_get_buffer(f
, (uint8_t *)id
, len
);
2326 block
= qemu_ram_block_by_name(id
);
2328 error_report("Can't find block %s", id
);
2335 static inline void *host_from_ram_block_offset(RAMBlock
*block
,
2338 if (!offset_in_ramblock(block
, offset
)) {
2342 return block
->host
+ offset
;
2346 * ram_handle_compressed: handle the zero page case
2348 * If a page (or a whole RDMA chunk) has been
2349 * determined to be zero, then zap it.
2351 * @host: host address for the zero page
2352 * @ch: what the page is filled from. We only support zero
2353 * @size: size of the zero page
2355 void ram_handle_compressed(void *host
, uint8_t ch
, uint64_t size
)
2357 if (ch
!= 0 || !is_zero_range(host
, size
)) {
2358 memset(host
, ch
, size
);
2362 static void *do_data_decompress(void *opaque
)
2364 DecompressParam
*param
= opaque
;
2365 unsigned long pagesize
;
2369 qemu_mutex_lock(¶m
->mutex
);
2370 while (!param
->quit
) {
2375 qemu_mutex_unlock(¶m
->mutex
);
2377 pagesize
= TARGET_PAGE_SIZE
;
2378 /* uncompress() will return failed in some case, especially
2379 * when the page is dirted when doing the compression, it's
2380 * not a problem because the dirty page will be retransferred
2381 * and uncompress() won't break the data in other pages.
2383 uncompress((Bytef
*)des
, &pagesize
,
2384 (const Bytef
*)param
->compbuf
, len
);
2386 qemu_mutex_lock(&decomp_done_lock
);
2388 qemu_cond_signal(&decomp_done_cond
);
2389 qemu_mutex_unlock(&decomp_done_lock
);
2391 qemu_mutex_lock(¶m
->mutex
);
2393 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
2396 qemu_mutex_unlock(¶m
->mutex
);
2401 static void wait_for_decompress_done(void)
2403 int idx
, thread_count
;
2405 if (!migrate_use_compression()) {
2409 thread_count
= migrate_decompress_threads();
2410 qemu_mutex_lock(&decomp_done_lock
);
2411 for (idx
= 0; idx
< thread_count
; idx
++) {
2412 while (!decomp_param
[idx
].done
) {
2413 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2416 qemu_mutex_unlock(&decomp_done_lock
);
2419 void migrate_decompress_threads_create(void)
2421 int i
, thread_count
;
2423 thread_count
= migrate_decompress_threads();
2424 decompress_threads
= g_new0(QemuThread
, thread_count
);
2425 decomp_param
= g_new0(DecompressParam
, thread_count
);
2426 qemu_mutex_init(&decomp_done_lock
);
2427 qemu_cond_init(&decomp_done_cond
);
2428 for (i
= 0; i
< thread_count
; i
++) {
2429 qemu_mutex_init(&decomp_param
[i
].mutex
);
2430 qemu_cond_init(&decomp_param
[i
].cond
);
2431 decomp_param
[i
].compbuf
= g_malloc0(compressBound(TARGET_PAGE_SIZE
));
2432 decomp_param
[i
].done
= true;
2433 decomp_param
[i
].quit
= false;
2434 qemu_thread_create(decompress_threads
+ i
, "decompress",
2435 do_data_decompress
, decomp_param
+ i
,
2436 QEMU_THREAD_JOINABLE
);
2440 void migrate_decompress_threads_join(void)
2442 int i
, thread_count
;
2444 thread_count
= migrate_decompress_threads();
2445 for (i
= 0; i
< thread_count
; i
++) {
2446 qemu_mutex_lock(&decomp_param
[i
].mutex
);
2447 decomp_param
[i
].quit
= true;
2448 qemu_cond_signal(&decomp_param
[i
].cond
);
2449 qemu_mutex_unlock(&decomp_param
[i
].mutex
);
2451 for (i
= 0; i
< thread_count
; i
++) {
2452 qemu_thread_join(decompress_threads
+ i
);
2453 qemu_mutex_destroy(&decomp_param
[i
].mutex
);
2454 qemu_cond_destroy(&decomp_param
[i
].cond
);
2455 g_free(decomp_param
[i
].compbuf
);
2457 g_free(decompress_threads
);
2458 g_free(decomp_param
);
2459 decompress_threads
= NULL
;
2460 decomp_param
= NULL
;
2463 static void decompress_data_with_multi_threads(QEMUFile
*f
,
2464 void *host
, int len
)
2466 int idx
, thread_count
;
2468 thread_count
= migrate_decompress_threads();
2469 qemu_mutex_lock(&decomp_done_lock
);
2471 for (idx
= 0; idx
< thread_count
; idx
++) {
2472 if (decomp_param
[idx
].done
) {
2473 decomp_param
[idx
].done
= false;
2474 qemu_mutex_lock(&decomp_param
[idx
].mutex
);
2475 qemu_get_buffer(f
, decomp_param
[idx
].compbuf
, len
);
2476 decomp_param
[idx
].des
= host
;
2477 decomp_param
[idx
].len
= len
;
2478 qemu_cond_signal(&decomp_param
[idx
].cond
);
2479 qemu_mutex_unlock(&decomp_param
[idx
].mutex
);
2483 if (idx
< thread_count
) {
2486 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2489 qemu_mutex_unlock(&decomp_done_lock
);
2493 * ram_postcopy_incoming_init: allocate postcopy data structures
2495 * Returns 0 for success and negative if there was one error
2497 * @mis: current migration incoming state
2499 * Allocate data structures etc needed by incoming migration with
2500 * postcopy-ram. postcopy-ram's similarly names
2501 * postcopy_ram_incoming_init does the work.
2503 int ram_postcopy_incoming_init(MigrationIncomingState
*mis
)
2505 size_t ram_pages
= last_ram_offset() >> TARGET_PAGE_BITS
;
2507 return postcopy_ram_incoming_init(mis
, ram_pages
);
2511 * ram_load_postcopy: load a page in postcopy case
2513 * Returns 0 for success or -errno in case of error
2515 * Called in postcopy mode by ram_load().
2516 * rcu_read_lock is taken prior to this being called.
2518 * @f: QEMUFile where to send the data
2520 static int ram_load_postcopy(QEMUFile
*f
)
2522 int flags
= 0, ret
= 0;
2523 bool place_needed
= false;
2524 bool matching_page_sizes
= false;
2525 MigrationIncomingState
*mis
= migration_incoming_get_current();
2526 /* Temporary page that is later 'placed' */
2527 void *postcopy_host_page
= postcopy_get_tmp_page(mis
);
2528 void *last_host
= NULL
;
2529 bool all_zero
= false;
2531 while (!ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
2534 void *page_buffer
= NULL
;
2535 void *place_source
= NULL
;
2536 RAMBlock
*block
= NULL
;
2539 addr
= qemu_get_be64(f
);
2540 flags
= addr
& ~TARGET_PAGE_MASK
;
2541 addr
&= TARGET_PAGE_MASK
;
2543 trace_ram_load_postcopy_loop((uint64_t)addr
, flags
);
2544 place_needed
= false;
2545 if (flags
& (RAM_SAVE_FLAG_COMPRESS
| RAM_SAVE_FLAG_PAGE
)) {
2546 block
= ram_block_from_stream(f
, flags
);
2548 host
= host_from_ram_block_offset(block
, addr
);
2550 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
2554 matching_page_sizes
= block
->page_size
== TARGET_PAGE_SIZE
;
2556 * Postcopy requires that we place whole host pages atomically;
2557 * these may be huge pages for RAMBlocks that are backed by
2559 * To make it atomic, the data is read into a temporary page
2560 * that's moved into place later.
2561 * The migration protocol uses, possibly smaller, target-pages
2562 * however the source ensures it always sends all the components
2563 * of a host page in order.
2565 page_buffer
= postcopy_host_page
+
2566 ((uintptr_t)host
& (block
->page_size
- 1));
2567 /* If all TP are zero then we can optimise the place */
2568 if (!((uintptr_t)host
& (block
->page_size
- 1))) {
2571 /* not the 1st TP within the HP */
2572 if (host
!= (last_host
+ TARGET_PAGE_SIZE
)) {
2573 error_report("Non-sequential target page %p/%p",
2582 * If it's the last part of a host page then we place the host
2585 place_needed
= (((uintptr_t)host
+ TARGET_PAGE_SIZE
) &
2586 (block
->page_size
- 1)) == 0;
2587 place_source
= postcopy_host_page
;
2591 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
2592 case RAM_SAVE_FLAG_COMPRESS
:
2593 ch
= qemu_get_byte(f
);
2594 memset(page_buffer
, ch
, TARGET_PAGE_SIZE
);
2600 case RAM_SAVE_FLAG_PAGE
:
2602 if (!place_needed
|| !matching_page_sizes
) {
2603 qemu_get_buffer(f
, page_buffer
, TARGET_PAGE_SIZE
);
2605 /* Avoids the qemu_file copy during postcopy, which is
2606 * going to do a copy later; can only do it when we
2607 * do this read in one go (matching page sizes)
2609 qemu_get_buffer_in_place(f
, (uint8_t **)&place_source
,
2613 case RAM_SAVE_FLAG_EOS
:
2617 error_report("Unknown combination of migration flags: %#x"
2618 " (postcopy mode)", flags
);
2623 /* This gets called at the last target page in the host page */
2624 void *place_dest
= host
+ TARGET_PAGE_SIZE
- block
->page_size
;
2627 ret
= postcopy_place_page_zero(mis
, place_dest
,
2630 ret
= postcopy_place_page(mis
, place_dest
,
2631 place_source
, block
->page_size
);
2635 ret
= qemu_file_get_error(f
);
2642 static int ram_load(QEMUFile
*f
, void *opaque
, int version_id
)
2644 int flags
= 0, ret
= 0;
2645 static uint64_t seq_iter
;
2648 * If system is running in postcopy mode, page inserts to host memory must
2651 bool postcopy_running
= postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING
;
2652 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2653 bool postcopy_advised
= postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE
;
2657 if (version_id
!= 4) {
2661 /* This RCU critical section can be very long running.
2662 * When RCU reclaims in the code start to become numerous,
2663 * it will be necessary to reduce the granularity of this
2668 if (postcopy_running
) {
2669 ret
= ram_load_postcopy(f
);
2672 while (!postcopy_running
&& !ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
2673 ram_addr_t addr
, total_ram_bytes
;
2677 addr
= qemu_get_be64(f
);
2678 flags
= addr
& ~TARGET_PAGE_MASK
;
2679 addr
&= TARGET_PAGE_MASK
;
2681 if (flags
& (RAM_SAVE_FLAG_COMPRESS
| RAM_SAVE_FLAG_PAGE
|
2682 RAM_SAVE_FLAG_COMPRESS_PAGE
| RAM_SAVE_FLAG_XBZRLE
)) {
2683 RAMBlock
*block
= ram_block_from_stream(f
, flags
);
2685 host
= host_from_ram_block_offset(block
, addr
);
2687 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
2693 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
2694 case RAM_SAVE_FLAG_MEM_SIZE
:
2695 /* Synchronize RAM block list */
2696 total_ram_bytes
= addr
;
2697 while (!ret
&& total_ram_bytes
) {
2702 len
= qemu_get_byte(f
);
2703 qemu_get_buffer(f
, (uint8_t *)id
, len
);
2705 length
= qemu_get_be64(f
);
2707 block
= qemu_ram_block_by_name(id
);
2709 if (length
!= block
->used_length
) {
2710 Error
*local_err
= NULL
;
2712 ret
= qemu_ram_resize(block
, length
,
2715 error_report_err(local_err
);
2718 /* For postcopy we need to check hugepage sizes match */
2719 if (postcopy_advised
&&
2720 block
->page_size
!= qemu_host_page_size
) {
2721 uint64_t remote_page_size
= qemu_get_be64(f
);
2722 if (remote_page_size
!= block
->page_size
) {
2723 error_report("Mismatched RAM page size %s "
2724 "(local) %zd != %" PRId64
,
2725 id
, block
->page_size
,
2730 ram_control_load_hook(f
, RAM_CONTROL_BLOCK_REG
,
2733 error_report("Unknown ramblock \"%s\", cannot "
2734 "accept migration", id
);
2738 total_ram_bytes
-= length
;
2742 case RAM_SAVE_FLAG_COMPRESS
:
2743 ch
= qemu_get_byte(f
);
2744 ram_handle_compressed(host
, ch
, TARGET_PAGE_SIZE
);
2747 case RAM_SAVE_FLAG_PAGE
:
2748 qemu_get_buffer(f
, host
, TARGET_PAGE_SIZE
);
2751 case RAM_SAVE_FLAG_COMPRESS_PAGE
:
2752 len
= qemu_get_be32(f
);
2753 if (len
< 0 || len
> compressBound(TARGET_PAGE_SIZE
)) {
2754 error_report("Invalid compressed data length: %d", len
);
2758 decompress_data_with_multi_threads(f
, host
, len
);
2761 case RAM_SAVE_FLAG_XBZRLE
:
2762 if (load_xbzrle(f
, addr
, host
) < 0) {
2763 error_report("Failed to decompress XBZRLE page at "
2764 RAM_ADDR_FMT
, addr
);
2769 case RAM_SAVE_FLAG_EOS
:
2773 if (flags
& RAM_SAVE_FLAG_HOOK
) {
2774 ram_control_load_hook(f
, RAM_CONTROL_HOOK
, NULL
);
2776 error_report("Unknown combination of migration flags: %#x",
2782 ret
= qemu_file_get_error(f
);
2786 wait_for_decompress_done();
2788 trace_ram_load_complete(ret
, seq_iter
);
2792 static SaveVMHandlers savevm_ram_handlers
= {
2793 .save_live_setup
= ram_save_setup
,
2794 .save_live_iterate
= ram_save_iterate
,
2795 .save_live_complete_postcopy
= ram_save_complete
,
2796 .save_live_complete_precopy
= ram_save_complete
,
2797 .save_live_pending
= ram_save_pending
,
2798 .load_state
= ram_load
,
2799 .cleanup
= ram_migration_cleanup
,
2802 void ram_mig_init(void)
2804 qemu_mutex_init(&XBZRLE
.lock
);
2805 register_savevm_live(NULL
, "ram", 0, 4, &savevm_ram_handlers
, &ram_state
);