4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28 #include "qemu/osdep.h"
29 #include "qemu-common.h"
32 #include "qapi-event.h"
33 #include "qemu/cutils.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "qemu/timer.h"
37 #include "qemu/main-loop.h"
39 #include "migration/migration.h"
40 #include "postcopy-ram.h"
41 #include "exec/address-spaces.h"
42 #include "migration/page_cache.h"
43 #include "qemu/error-report.h"
45 #include "exec/ram_addr.h"
46 #include "qemu/rcu_queue.h"
47 #include "migration/colo.h"
49 /***********************************************************/
50 /* ram save/restore */
52 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
53 * worked for pages that where filled with the same char. We switched
54 * it to only search for the zero value. And to avoid confusion with
55 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
58 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
59 #define RAM_SAVE_FLAG_ZERO 0x02
60 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
61 #define RAM_SAVE_FLAG_PAGE 0x08
62 #define RAM_SAVE_FLAG_EOS 0x10
63 #define RAM_SAVE_FLAG_CONTINUE 0x20
64 #define RAM_SAVE_FLAG_XBZRLE 0x40
65 /* 0x80 is reserved in migration.h start with 0x100 next */
66 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
68 static uint8_t *ZERO_TARGET_PAGE
;
70 static inline bool is_zero_range(uint8_t *p
, uint64_t size
)
72 return buffer_is_zero(p
, size
);
75 /* struct contains XBZRLE cache and a static page
76 used by the compression */
78 /* buffer used for XBZRLE encoding */
80 /* buffer for storing page content */
82 /* Cache for XBZRLE, Protected by lock. */
87 /* buffer used for XBZRLE decoding */
88 static uint8_t *xbzrle_decoded_buf
;
90 static void XBZRLE_cache_lock(void)
92 if (migrate_use_xbzrle())
93 qemu_mutex_lock(&XBZRLE
.lock
);
96 static void XBZRLE_cache_unlock(void)
98 if (migrate_use_xbzrle())
99 qemu_mutex_unlock(&XBZRLE
.lock
);
103 * xbzrle_cache_resize: resize the xbzrle cache
105 * This function is called from qmp_migrate_set_cache_size in main
106 * thread, possibly while a migration is in progress. A running
107 * migration may be using the cache and might finish during this call,
108 * hence changes to the cache are protected by XBZRLE.lock().
110 * Returns the new_size or negative in case of error.
112 * @new_size: new cache size
114 int64_t xbzrle_cache_resize(int64_t new_size
)
116 PageCache
*new_cache
;
119 if (new_size
< TARGET_PAGE_SIZE
) {
125 if (XBZRLE
.cache
!= NULL
) {
126 if (pow2floor(new_size
) == migrate_xbzrle_cache_size()) {
129 new_cache
= cache_init(new_size
/ TARGET_PAGE_SIZE
,
132 error_report("Error creating cache");
137 cache_fini(XBZRLE
.cache
);
138 XBZRLE
.cache
= new_cache
;
142 ret
= pow2floor(new_size
);
144 XBZRLE_cache_unlock();
149 * An outstanding page request, on the source, having been received
152 struct RAMSrcPageRequest
{
157 QSIMPLEQ_ENTRY(RAMSrcPageRequest
) next_req
;
160 /* State of RAM for migration */
162 /* QEMUFile used for this migration */
164 /* Last block that we have visited searching for dirty pages */
165 RAMBlock
*last_seen_block
;
166 /* Last block from where we have sent data */
167 RAMBlock
*last_sent_block
;
168 /* Last dirty target page we have sent */
169 ram_addr_t last_page
;
170 /* last ram version we have seen */
171 uint32_t last_version
;
172 /* We are in the first round */
174 /* How many times we have dirty too many pages */
175 int dirty_rate_high_cnt
;
176 /* How many times we have synchronized the bitmap */
177 uint64_t bitmap_sync_count
;
178 /* these variables are used for bitmap sync */
179 /* last time we did a full bitmap_sync */
180 int64_t time_last_bitmap_sync
;
181 /* bytes transferred at start_time */
182 uint64_t bytes_xfer_prev
;
183 /* number of dirty pages since start_time */
184 uint64_t num_dirty_pages_period
;
185 /* xbzrle misses since the beginning of the period */
186 uint64_t xbzrle_cache_miss_prev
;
187 /* number of iterations at the beginning of period */
188 uint64_t iterations_prev
;
189 /* Accounting fields */
190 /* number of zero pages. It used to be pages filled by the same char. */
192 /* number of normal transferred pages */
194 /* Iterations since start */
196 /* xbzrle transmitted bytes. Notice that this is with
197 * compression, they can't be calculated from the pages */
198 uint64_t xbzrle_bytes
;
199 /* xbzrle transmmited pages */
200 uint64_t xbzrle_pages
;
201 /* xbzrle number of cache miss */
202 uint64_t xbzrle_cache_miss
;
203 /* xbzrle miss rate */
204 double xbzrle_cache_miss_rate
;
205 /* xbzrle number of overflows */
206 uint64_t xbzrle_overflows
;
207 /* number of dirty bits in the bitmap */
208 uint64_t migration_dirty_pages
;
209 /* total number of bytes transferred */
210 uint64_t bytes_transferred
;
211 /* number of dirtied pages in the last second */
212 uint64_t dirty_pages_rate
;
213 /* Count of requests incoming from destination */
214 uint64_t postcopy_requests
;
215 /* protects modification of the bitmap */
216 QemuMutex bitmap_mutex
;
217 /* The RAMBlock used in the last src_page_requests */
218 RAMBlock
*last_req_rb
;
219 /* Queue of outstanding page requests from the destination */
220 QemuMutex src_page_req_mutex
;
221 QSIMPLEQ_HEAD(src_page_requests
, RAMSrcPageRequest
) src_page_requests
;
223 typedef struct RAMState RAMState
;
225 static RAMState ram_state
;
227 uint64_t dup_mig_pages_transferred(void)
229 return ram_state
.zero_pages
;
232 uint64_t norm_mig_pages_transferred(void)
234 return ram_state
.norm_pages
;
237 uint64_t xbzrle_mig_bytes_transferred(void)
239 return ram_state
.xbzrle_bytes
;
242 uint64_t xbzrle_mig_pages_transferred(void)
244 return ram_state
.xbzrle_pages
;
247 uint64_t xbzrle_mig_pages_cache_miss(void)
249 return ram_state
.xbzrle_cache_miss
;
252 double xbzrle_mig_cache_miss_rate(void)
254 return ram_state
.xbzrle_cache_miss_rate
;
257 uint64_t xbzrle_mig_pages_overflow(void)
259 return ram_state
.xbzrle_overflows
;
262 uint64_t ram_bytes_transferred(void)
264 return ram_state
.bytes_transferred
;
267 uint64_t ram_bytes_remaining(void)
269 return ram_state
.migration_dirty_pages
* TARGET_PAGE_SIZE
;
272 uint64_t ram_dirty_sync_count(void)
274 return ram_state
.bitmap_sync_count
;
277 uint64_t ram_dirty_pages_rate(void)
279 return ram_state
.dirty_pages_rate
;
282 uint64_t ram_postcopy_requests(void)
284 return ram_state
.postcopy_requests
;
287 /* used by the search for pages to send */
288 struct PageSearchStatus
{
289 /* Current block being searched */
291 /* Current page to search from */
293 /* Set once we wrap around */
296 typedef struct PageSearchStatus PageSearchStatus
;
298 struct CompressParam
{
307 typedef struct CompressParam CompressParam
;
309 struct DecompressParam
{
318 typedef struct DecompressParam DecompressParam
;
320 static CompressParam
*comp_param
;
321 static QemuThread
*compress_threads
;
322 /* comp_done_cond is used to wake up the migration thread when
323 * one of the compression threads has finished the compression.
324 * comp_done_lock is used to co-work with comp_done_cond.
326 static QemuMutex comp_done_lock
;
327 static QemuCond comp_done_cond
;
328 /* The empty QEMUFileOps will be used by file in CompressParam */
329 static const QEMUFileOps empty_ops
= { };
331 static DecompressParam
*decomp_param
;
332 static QemuThread
*decompress_threads
;
333 static QemuMutex decomp_done_lock
;
334 static QemuCond decomp_done_cond
;
336 static int do_compress_ram_page(QEMUFile
*f
, RAMBlock
*block
,
339 static void *do_data_compress(void *opaque
)
341 CompressParam
*param
= opaque
;
345 qemu_mutex_lock(¶m
->mutex
);
346 while (!param
->quit
) {
348 block
= param
->block
;
349 offset
= param
->offset
;
351 qemu_mutex_unlock(¶m
->mutex
);
353 do_compress_ram_page(param
->file
, block
, offset
);
355 qemu_mutex_lock(&comp_done_lock
);
357 qemu_cond_signal(&comp_done_cond
);
358 qemu_mutex_unlock(&comp_done_lock
);
360 qemu_mutex_lock(¶m
->mutex
);
362 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
365 qemu_mutex_unlock(¶m
->mutex
);
370 static inline void terminate_compression_threads(void)
372 int idx
, thread_count
;
374 thread_count
= migrate_compress_threads();
376 for (idx
= 0; idx
< thread_count
; idx
++) {
377 qemu_mutex_lock(&comp_param
[idx
].mutex
);
378 comp_param
[idx
].quit
= true;
379 qemu_cond_signal(&comp_param
[idx
].cond
);
380 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
384 void migrate_compress_threads_join(void)
388 if (!migrate_use_compression()) {
391 terminate_compression_threads();
392 thread_count
= migrate_compress_threads();
393 for (i
= 0; i
< thread_count
; i
++) {
394 qemu_thread_join(compress_threads
+ i
);
395 qemu_fclose(comp_param
[i
].file
);
396 qemu_mutex_destroy(&comp_param
[i
].mutex
);
397 qemu_cond_destroy(&comp_param
[i
].cond
);
399 qemu_mutex_destroy(&comp_done_lock
);
400 qemu_cond_destroy(&comp_done_cond
);
401 g_free(compress_threads
);
403 compress_threads
= NULL
;
407 void migrate_compress_threads_create(void)
411 if (!migrate_use_compression()) {
414 thread_count
= migrate_compress_threads();
415 compress_threads
= g_new0(QemuThread
, thread_count
);
416 comp_param
= g_new0(CompressParam
, thread_count
);
417 qemu_cond_init(&comp_done_cond
);
418 qemu_mutex_init(&comp_done_lock
);
419 for (i
= 0; i
< thread_count
; i
++) {
420 /* comp_param[i].file is just used as a dummy buffer to save data,
421 * set its ops to empty.
423 comp_param
[i
].file
= qemu_fopen_ops(NULL
, &empty_ops
);
424 comp_param
[i
].done
= true;
425 comp_param
[i
].quit
= false;
426 qemu_mutex_init(&comp_param
[i
].mutex
);
427 qemu_cond_init(&comp_param
[i
].cond
);
428 qemu_thread_create(compress_threads
+ i
, "compress",
429 do_data_compress
, comp_param
+ i
,
430 QEMU_THREAD_JOINABLE
);
435 * save_page_header: write page header to wire
437 * If this is the 1st block, it also writes the block identification
439 * Returns the number of bytes written
441 * @f: QEMUFile where to send the data
442 * @block: block that contains the page we want to send
443 * @offset: offset inside the block for the page
444 * in the lower bits, it contains flags
446 static size_t save_page_header(RAMState
*rs
, QEMUFile
*f
, RAMBlock
*block
,
451 if (block
== rs
->last_sent_block
) {
452 offset
|= RAM_SAVE_FLAG_CONTINUE
;
454 qemu_put_be64(f
, offset
);
457 if (!(offset
& RAM_SAVE_FLAG_CONTINUE
)) {
458 len
= strlen(block
->idstr
);
459 qemu_put_byte(f
, len
);
460 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, len
);
462 rs
->last_sent_block
= block
;
468 * mig_throttle_guest_down: throotle down the guest
470 * Reduce amount of guest cpu execution to hopefully slow down memory
471 * writes. If guest dirty memory rate is reduced below the rate at
472 * which we can transfer pages to the destination then we should be
473 * able to complete migration. Some workloads dirty memory way too
474 * fast and will not effectively converge, even with auto-converge.
476 static void mig_throttle_guest_down(void)
478 MigrationState
*s
= migrate_get_current();
479 uint64_t pct_initial
= s
->parameters
.cpu_throttle_initial
;
480 uint64_t pct_icrement
= s
->parameters
.cpu_throttle_increment
;
482 /* We have not started throttling yet. Let's start it. */
483 if (!cpu_throttle_active()) {
484 cpu_throttle_set(pct_initial
);
486 /* Throttling already on, just increase the rate */
487 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement
);
492 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
494 * @rs: current RAM state
495 * @current_addr: address for the zero page
497 * Update the xbzrle cache to reflect a page that's been sent as all 0.
498 * The important thing is that a stale (not-yet-0'd) page be replaced
500 * As a bonus, if the page wasn't in the cache it gets added so that
501 * when a small write is made into the 0'd page it gets XBZRLE sent.
503 static void xbzrle_cache_zero_page(RAMState
*rs
, ram_addr_t current_addr
)
505 if (rs
->ram_bulk_stage
|| !migrate_use_xbzrle()) {
509 /* We don't care if this fails to allocate a new cache page
510 * as long as it updated an old one */
511 cache_insert(XBZRLE
.cache
, current_addr
, ZERO_TARGET_PAGE
,
512 rs
->bitmap_sync_count
);
515 #define ENCODING_FLAG_XBZRLE 0x1
518 * save_xbzrle_page: compress and send current page
520 * Returns: 1 means that we wrote the page
521 * 0 means that page is identical to the one already sent
522 * -1 means that xbzrle would be longer than normal
524 * @rs: current RAM state
525 * @current_data: pointer to the address of the page contents
526 * @current_addr: addr of the page
527 * @block: block that contains the page we want to send
528 * @offset: offset inside the block for the page
529 * @last_stage: if we are at the completion stage
531 static int save_xbzrle_page(RAMState
*rs
, uint8_t **current_data
,
532 ram_addr_t current_addr
, RAMBlock
*block
,
533 ram_addr_t offset
, bool last_stage
)
535 int encoded_len
= 0, bytes_xbzrle
;
536 uint8_t *prev_cached_page
;
538 if (!cache_is_cached(XBZRLE
.cache
, current_addr
, rs
->bitmap_sync_count
)) {
539 rs
->xbzrle_cache_miss
++;
541 if (cache_insert(XBZRLE
.cache
, current_addr
, *current_data
,
542 rs
->bitmap_sync_count
) == -1) {
545 /* update *current_data when the page has been
546 inserted into cache */
547 *current_data
= get_cached_data(XBZRLE
.cache
, current_addr
);
553 prev_cached_page
= get_cached_data(XBZRLE
.cache
, current_addr
);
555 /* save current buffer into memory */
556 memcpy(XBZRLE
.current_buf
, *current_data
, TARGET_PAGE_SIZE
);
558 /* XBZRLE encoding (if there is no overflow) */
559 encoded_len
= xbzrle_encode_buffer(prev_cached_page
, XBZRLE
.current_buf
,
560 TARGET_PAGE_SIZE
, XBZRLE
.encoded_buf
,
562 if (encoded_len
== 0) {
563 trace_save_xbzrle_page_skipping();
565 } else if (encoded_len
== -1) {
566 trace_save_xbzrle_page_overflow();
567 rs
->xbzrle_overflows
++;
568 /* update data in the cache */
570 memcpy(prev_cached_page
, *current_data
, TARGET_PAGE_SIZE
);
571 *current_data
= prev_cached_page
;
576 /* we need to update the data in the cache, in order to get the same data */
578 memcpy(prev_cached_page
, XBZRLE
.current_buf
, TARGET_PAGE_SIZE
);
581 /* Send XBZRLE based compressed page */
582 bytes_xbzrle
= save_page_header(rs
, rs
->f
, block
,
583 offset
| RAM_SAVE_FLAG_XBZRLE
);
584 qemu_put_byte(rs
->f
, ENCODING_FLAG_XBZRLE
);
585 qemu_put_be16(rs
->f
, encoded_len
);
586 qemu_put_buffer(rs
->f
, XBZRLE
.encoded_buf
, encoded_len
);
587 bytes_xbzrle
+= encoded_len
+ 1 + 2;
589 rs
->xbzrle_bytes
+= bytes_xbzrle
;
590 rs
->bytes_transferred
+= bytes_xbzrle
;
596 * migration_bitmap_find_dirty: find the next dirty page from start
598 * Called with rcu_read_lock() to protect migration_bitmap
600 * Returns the byte offset within memory region of the start of a dirty page
602 * @rs: current RAM state
603 * @rb: RAMBlock where to search for dirty pages
604 * @start: page where we start the search
607 unsigned long migration_bitmap_find_dirty(RAMState
*rs
, RAMBlock
*rb
,
610 unsigned long size
= rb
->used_length
>> TARGET_PAGE_BITS
;
611 unsigned long *bitmap
= rb
->bmap
;
614 if (rs
->ram_bulk_stage
&& start
> 0) {
617 next
= find_next_bit(bitmap
, size
, start
);
623 static inline bool migration_bitmap_clear_dirty(RAMState
*rs
,
629 ret
= test_and_clear_bit(page
, rb
->bmap
);
632 rs
->migration_dirty_pages
--;
637 static void migration_bitmap_sync_range(RAMState
*rs
, RAMBlock
*rb
,
638 ram_addr_t start
, ram_addr_t length
)
640 rs
->migration_dirty_pages
+=
641 cpu_physical_memory_sync_dirty_bitmap(rb
, start
, length
,
642 &rs
->num_dirty_pages_period
);
646 * ram_pagesize_summary: calculate all the pagesizes of a VM
648 * Returns a summary bitmap of the page sizes of all RAMBlocks
650 * For VMs with just normal pages this is equivalent to the host page
651 * size. If it's got some huge pages then it's the OR of all the
652 * different page sizes.
654 uint64_t ram_pagesize_summary(void)
657 uint64_t summary
= 0;
659 RAMBLOCK_FOREACH(block
) {
660 summary
|= block
->page_size
;
666 static void migration_bitmap_sync(RAMState
*rs
)
670 uint64_t bytes_xfer_now
;
672 rs
->bitmap_sync_count
++;
674 if (!rs
->bytes_xfer_prev
) {
675 rs
->bytes_xfer_prev
= ram_bytes_transferred();
678 if (!rs
->time_last_bitmap_sync
) {
679 rs
->time_last_bitmap_sync
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
682 trace_migration_bitmap_sync_start();
683 memory_global_dirty_log_sync();
685 qemu_mutex_lock(&rs
->bitmap_mutex
);
687 RAMBLOCK_FOREACH(block
) {
688 migration_bitmap_sync_range(rs
, block
, 0, block
->used_length
);
691 qemu_mutex_unlock(&rs
->bitmap_mutex
);
693 trace_migration_bitmap_sync_end(rs
->num_dirty_pages_period
);
695 end_time
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
697 /* more than 1 second = 1000 millisecons */
698 if (end_time
> rs
->time_last_bitmap_sync
+ 1000) {
699 if (migrate_auto_converge()) {
700 /* The following detection logic can be refined later. For now:
701 Check to see if the dirtied bytes is 50% more than the approx.
702 amount of bytes that just got transferred since the last time we
703 were in this routine. If that happens twice, start or increase
705 bytes_xfer_now
= ram_bytes_transferred();
707 if (rs
->dirty_pages_rate
&&
708 (rs
->num_dirty_pages_period
* TARGET_PAGE_SIZE
>
709 (bytes_xfer_now
- rs
->bytes_xfer_prev
) / 2) &&
710 (rs
->dirty_rate_high_cnt
++ >= 2)) {
711 trace_migration_throttle();
712 rs
->dirty_rate_high_cnt
= 0;
713 mig_throttle_guest_down();
715 rs
->bytes_xfer_prev
= bytes_xfer_now
;
718 if (migrate_use_xbzrle()) {
719 if (rs
->iterations_prev
!= rs
->iterations
) {
720 rs
->xbzrle_cache_miss_rate
=
721 (double)(rs
->xbzrle_cache_miss
-
722 rs
->xbzrle_cache_miss_prev
) /
723 (rs
->iterations
- rs
->iterations_prev
);
725 rs
->iterations_prev
= rs
->iterations
;
726 rs
->xbzrle_cache_miss_prev
= rs
->xbzrle_cache_miss
;
728 rs
->dirty_pages_rate
= rs
->num_dirty_pages_period
* 1000
729 / (end_time
- rs
->time_last_bitmap_sync
);
730 rs
->time_last_bitmap_sync
= end_time
;
731 rs
->num_dirty_pages_period
= 0;
733 if (migrate_use_events()) {
734 qapi_event_send_migration_pass(rs
->bitmap_sync_count
, NULL
);
739 * save_zero_page: send the zero page to the stream
741 * Returns the number of pages written.
743 * @rs: current RAM state
744 * @block: block that contains the page we want to send
745 * @offset: offset inside the block for the page
746 * @p: pointer to the page
748 static int save_zero_page(RAMState
*rs
, RAMBlock
*block
, ram_addr_t offset
,
753 if (is_zero_range(p
, TARGET_PAGE_SIZE
)) {
755 rs
->bytes_transferred
+=
756 save_page_header(rs
, rs
->f
, block
, offset
| RAM_SAVE_FLAG_ZERO
);
757 qemu_put_byte(rs
->f
, 0);
758 rs
->bytes_transferred
+= 1;
765 static void ram_release_pages(const char *rbname
, uint64_t offset
, int pages
)
767 if (!migrate_release_ram() || !migration_in_postcopy()) {
771 ram_discard_range(rbname
, offset
, pages
<< TARGET_PAGE_BITS
);
775 * ram_save_page: send the given page to the stream
777 * Returns the number of pages written.
779 * >=0 - Number of pages written - this might legally be 0
780 * if xbzrle noticed the page was the same.
782 * @rs: current RAM state
783 * @block: block that contains the page we want to send
784 * @offset: offset inside the block for the page
785 * @last_stage: if we are at the completion stage
787 static int ram_save_page(RAMState
*rs
, PageSearchStatus
*pss
, bool last_stage
)
791 ram_addr_t current_addr
;
794 bool send_async
= true;
795 RAMBlock
*block
= pss
->block
;
796 ram_addr_t offset
= pss
->page
<< TARGET_PAGE_BITS
;
798 p
= block
->host
+ offset
;
799 trace_ram_save_page(block
->idstr
, (uint64_t)offset
, p
);
801 /* In doubt sent page as normal */
803 ret
= ram_control_save_page(rs
->f
, block
->offset
,
804 offset
, TARGET_PAGE_SIZE
, &bytes_xmit
);
806 rs
->bytes_transferred
+= bytes_xmit
;
812 current_addr
= block
->offset
+ offset
;
814 if (ret
!= RAM_SAVE_CONTROL_NOT_SUPP
) {
815 if (ret
!= RAM_SAVE_CONTROL_DELAYED
) {
816 if (bytes_xmit
> 0) {
818 } else if (bytes_xmit
== 0) {
823 pages
= save_zero_page(rs
, block
, offset
, p
);
825 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
826 * page would be stale
828 xbzrle_cache_zero_page(rs
, current_addr
);
829 ram_release_pages(block
->idstr
, offset
, pages
);
830 } else if (!rs
->ram_bulk_stage
&&
831 !migration_in_postcopy() && migrate_use_xbzrle()) {
832 pages
= save_xbzrle_page(rs
, &p
, current_addr
, block
,
835 /* Can't send this cached data async, since the cache page
836 * might get updated before it gets to the wire
843 /* XBZRLE overflow or normal page */
845 rs
->bytes_transferred
+= save_page_header(rs
, rs
->f
, block
,
846 offset
| RAM_SAVE_FLAG_PAGE
);
848 qemu_put_buffer_async(rs
->f
, p
, TARGET_PAGE_SIZE
,
849 migrate_release_ram() &
850 migration_in_postcopy());
852 qemu_put_buffer(rs
->f
, p
, TARGET_PAGE_SIZE
);
854 rs
->bytes_transferred
+= TARGET_PAGE_SIZE
;
859 XBZRLE_cache_unlock();
864 static int do_compress_ram_page(QEMUFile
*f
, RAMBlock
*block
,
867 RAMState
*rs
= &ram_state
;
868 int bytes_sent
, blen
;
869 uint8_t *p
= block
->host
+ (offset
& TARGET_PAGE_MASK
);
871 bytes_sent
= save_page_header(rs
, f
, block
, offset
|
872 RAM_SAVE_FLAG_COMPRESS_PAGE
);
873 blen
= qemu_put_compression_data(f
, p
, TARGET_PAGE_SIZE
,
874 migrate_compress_level());
877 qemu_file_set_error(migrate_get_current()->to_dst_file
, blen
);
878 error_report("compressed data failed!");
881 ram_release_pages(block
->idstr
, offset
& TARGET_PAGE_MASK
, 1);
887 static void flush_compressed_data(RAMState
*rs
)
889 int idx
, len
, thread_count
;
891 if (!migrate_use_compression()) {
894 thread_count
= migrate_compress_threads();
896 qemu_mutex_lock(&comp_done_lock
);
897 for (idx
= 0; idx
< thread_count
; idx
++) {
898 while (!comp_param
[idx
].done
) {
899 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
902 qemu_mutex_unlock(&comp_done_lock
);
904 for (idx
= 0; idx
< thread_count
; idx
++) {
905 qemu_mutex_lock(&comp_param
[idx
].mutex
);
906 if (!comp_param
[idx
].quit
) {
907 len
= qemu_put_qemu_file(rs
->f
, comp_param
[idx
].file
);
908 rs
->bytes_transferred
+= len
;
910 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
914 static inline void set_compress_params(CompressParam
*param
, RAMBlock
*block
,
917 param
->block
= block
;
918 param
->offset
= offset
;
921 static int compress_page_with_multi_thread(RAMState
*rs
, RAMBlock
*block
,
924 int idx
, thread_count
, bytes_xmit
= -1, pages
= -1;
926 thread_count
= migrate_compress_threads();
927 qemu_mutex_lock(&comp_done_lock
);
929 for (idx
= 0; idx
< thread_count
; idx
++) {
930 if (comp_param
[idx
].done
) {
931 comp_param
[idx
].done
= false;
932 bytes_xmit
= qemu_put_qemu_file(rs
->f
, comp_param
[idx
].file
);
933 qemu_mutex_lock(&comp_param
[idx
].mutex
);
934 set_compress_params(&comp_param
[idx
], block
, offset
);
935 qemu_cond_signal(&comp_param
[idx
].cond
);
936 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
939 rs
->bytes_transferred
+= bytes_xmit
;
946 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
949 qemu_mutex_unlock(&comp_done_lock
);
955 * ram_save_compressed_page: compress the given page and send it to the stream
957 * Returns the number of pages written.
959 * @rs: current RAM state
960 * @block: block that contains the page we want to send
961 * @offset: offset inside the block for the page
962 * @last_stage: if we are at the completion stage
964 static int ram_save_compressed_page(RAMState
*rs
, PageSearchStatus
*pss
,
968 uint64_t bytes_xmit
= 0;
971 RAMBlock
*block
= pss
->block
;
972 ram_addr_t offset
= pss
->page
<< TARGET_PAGE_BITS
;
974 p
= block
->host
+ offset
;
976 ret
= ram_control_save_page(rs
->f
, block
->offset
,
977 offset
, TARGET_PAGE_SIZE
, &bytes_xmit
);
979 rs
->bytes_transferred
+= bytes_xmit
;
982 if (ret
!= RAM_SAVE_CONTROL_NOT_SUPP
) {
983 if (ret
!= RAM_SAVE_CONTROL_DELAYED
) {
984 if (bytes_xmit
> 0) {
986 } else if (bytes_xmit
== 0) {
991 /* When starting the process of a new block, the first page of
992 * the block should be sent out before other pages in the same
993 * block, and all the pages in last block should have been sent
994 * out, keeping this order is important, because the 'cont' flag
995 * is used to avoid resending the block name.
997 if (block
!= rs
->last_sent_block
) {
998 flush_compressed_data(rs
);
999 pages
= save_zero_page(rs
, block
, offset
, p
);
1001 /* Make sure the first page is sent out before other pages */
1002 bytes_xmit
= save_page_header(rs
, rs
->f
, block
, offset
|
1003 RAM_SAVE_FLAG_COMPRESS_PAGE
);
1004 blen
= qemu_put_compression_data(rs
->f
, p
, TARGET_PAGE_SIZE
,
1005 migrate_compress_level());
1007 rs
->bytes_transferred
+= bytes_xmit
+ blen
;
1011 qemu_file_set_error(rs
->f
, blen
);
1012 error_report("compressed data failed!");
1016 ram_release_pages(block
->idstr
, offset
, pages
);
1019 pages
= save_zero_page(rs
, block
, offset
, p
);
1021 pages
= compress_page_with_multi_thread(rs
, block
, offset
);
1023 ram_release_pages(block
->idstr
, offset
, pages
);
1032 * find_dirty_block: find the next dirty page and update any state
1033 * associated with the search process.
1035 * Returns if a page is found
1037 * @rs: current RAM state
1038 * @pss: data about the state of the current dirty page scan
1039 * @again: set to false if the search has scanned the whole of RAM
1041 static bool find_dirty_block(RAMState
*rs
, PageSearchStatus
*pss
, bool *again
)
1043 pss
->page
= migration_bitmap_find_dirty(rs
, pss
->block
, pss
->page
);
1044 if (pss
->complete_round
&& pss
->block
== rs
->last_seen_block
&&
1045 pss
->page
>= rs
->last_page
) {
1047 * We've been once around the RAM and haven't found anything.
1053 if ((pss
->page
<< TARGET_PAGE_BITS
) >= pss
->block
->used_length
) {
1054 /* Didn't find anything in this RAM Block */
1056 pss
->block
= QLIST_NEXT_RCU(pss
->block
, next
);
1058 /* Hit the end of the list */
1059 pss
->block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1060 /* Flag that we've looped */
1061 pss
->complete_round
= true;
1062 rs
->ram_bulk_stage
= false;
1063 if (migrate_use_xbzrle()) {
1064 /* If xbzrle is on, stop using the data compression at this
1065 * point. In theory, xbzrle can do better than compression.
1067 flush_compressed_data(rs
);
1070 /* Didn't find anything this time, but try again on the new block */
1074 /* Can go around again, but... */
1076 /* We've found something so probably don't need to */
1082 * unqueue_page: gets a page of the queue
1084 * Helper for 'get_queued_page' - gets a page off the queue
1086 * Returns the block of the page (or NULL if none available)
1088 * @rs: current RAM state
1089 * @offset: used to return the offset within the RAMBlock
1091 static RAMBlock
*unqueue_page(RAMState
*rs
, ram_addr_t
*offset
)
1093 RAMBlock
*block
= NULL
;
1095 qemu_mutex_lock(&rs
->src_page_req_mutex
);
1096 if (!QSIMPLEQ_EMPTY(&rs
->src_page_requests
)) {
1097 struct RAMSrcPageRequest
*entry
=
1098 QSIMPLEQ_FIRST(&rs
->src_page_requests
);
1100 *offset
= entry
->offset
;
1102 if (entry
->len
> TARGET_PAGE_SIZE
) {
1103 entry
->len
-= TARGET_PAGE_SIZE
;
1104 entry
->offset
+= TARGET_PAGE_SIZE
;
1106 memory_region_unref(block
->mr
);
1107 QSIMPLEQ_REMOVE_HEAD(&rs
->src_page_requests
, next_req
);
1111 qemu_mutex_unlock(&rs
->src_page_req_mutex
);
1117 * get_queued_page: unqueue a page from the postocpy requests
1119 * Skips pages that are already sent (!dirty)
1121 * Returns if a queued page is found
1123 * @rs: current RAM state
1124 * @pss: data about the state of the current dirty page scan
1126 static bool get_queued_page(RAMState
*rs
, PageSearchStatus
*pss
)
1133 block
= unqueue_page(rs
, &offset
);
1135 * We're sending this page, and since it's postcopy nothing else
1136 * will dirty it, and we must make sure it doesn't get sent again
1137 * even if this queue request was received after the background
1138 * search already sent it.
1143 page
= offset
>> TARGET_PAGE_BITS
;
1144 dirty
= test_bit(page
, block
->bmap
);
1146 trace_get_queued_page_not_dirty(block
->idstr
, (uint64_t)offset
,
1147 page
, test_bit(page
, block
->unsentmap
));
1149 trace_get_queued_page(block
->idstr
, (uint64_t)offset
, page
);
1153 } while (block
&& !dirty
);
1157 * As soon as we start servicing pages out of order, then we have
1158 * to kill the bulk stage, since the bulk stage assumes
1159 * in (migration_bitmap_find_and_reset_dirty) that every page is
1160 * dirty, that's no longer true.
1162 rs
->ram_bulk_stage
= false;
1165 * We want the background search to continue from the queued page
1166 * since the guest is likely to want other pages near to the page
1167 * it just requested.
1170 pss
->page
= offset
>> TARGET_PAGE_BITS
;
1177 * migration_page_queue_free: drop any remaining pages in the ram
1180 * It should be empty at the end anyway, but in error cases there may
1181 * be some left. in case that there is any page left, we drop it.
1184 void migration_page_queue_free(void)
1186 struct RAMSrcPageRequest
*mspr
, *next_mspr
;
1187 RAMState
*rs
= &ram_state
;
1188 /* This queue generally should be empty - but in the case of a failed
1189 * migration might have some droppings in.
1192 QSIMPLEQ_FOREACH_SAFE(mspr
, &rs
->src_page_requests
, next_req
, next_mspr
) {
1193 memory_region_unref(mspr
->rb
->mr
);
1194 QSIMPLEQ_REMOVE_HEAD(&rs
->src_page_requests
, next_req
);
1201 * ram_save_queue_pages: queue the page for transmission
1203 * A request from postcopy destination for example.
1205 * Returns zero on success or negative on error
1207 * @rbname: Name of the RAMBLock of the request. NULL means the
1208 * same that last one.
1209 * @start: starting address from the start of the RAMBlock
1210 * @len: length (in bytes) to send
1212 int ram_save_queue_pages(const char *rbname
, ram_addr_t start
, ram_addr_t len
)
1215 RAMState
*rs
= &ram_state
;
1217 rs
->postcopy_requests
++;
1220 /* Reuse last RAMBlock */
1221 ramblock
= rs
->last_req_rb
;
1225 * Shouldn't happen, we can't reuse the last RAMBlock if
1226 * it's the 1st request.
1228 error_report("ram_save_queue_pages no previous block");
1232 ramblock
= qemu_ram_block_by_name(rbname
);
1235 /* We shouldn't be asked for a non-existent RAMBlock */
1236 error_report("ram_save_queue_pages no block '%s'", rbname
);
1239 rs
->last_req_rb
= ramblock
;
1241 trace_ram_save_queue_pages(ramblock
->idstr
, start
, len
);
1242 if (start
+len
> ramblock
->used_length
) {
1243 error_report("%s request overrun start=" RAM_ADDR_FMT
" len="
1244 RAM_ADDR_FMT
" blocklen=" RAM_ADDR_FMT
,
1245 __func__
, start
, len
, ramblock
->used_length
);
1249 struct RAMSrcPageRequest
*new_entry
=
1250 g_malloc0(sizeof(struct RAMSrcPageRequest
));
1251 new_entry
->rb
= ramblock
;
1252 new_entry
->offset
= start
;
1253 new_entry
->len
= len
;
1255 memory_region_ref(ramblock
->mr
);
1256 qemu_mutex_lock(&rs
->src_page_req_mutex
);
1257 QSIMPLEQ_INSERT_TAIL(&rs
->src_page_requests
, new_entry
, next_req
);
1258 qemu_mutex_unlock(&rs
->src_page_req_mutex
);
1269 * ram_save_target_page: save one target page
1271 * Returns the number of pages written
1273 * @rs: current RAM state
1274 * @ms: current migration state
1275 * @pss: data about the page we want to send
1276 * @last_stage: if we are at the completion stage
1278 static int ram_save_target_page(RAMState
*rs
, PageSearchStatus
*pss
,
1283 /* Check the pages is dirty and if it is send it */
1284 if (migration_bitmap_clear_dirty(rs
, pss
->block
, pss
->page
)) {
1286 * If xbzrle is on, stop using the data compression after first
1287 * round of migration even if compression is enabled. In theory,
1288 * xbzrle can do better than compression.
1290 if (migrate_use_compression() &&
1291 (rs
->ram_bulk_stage
|| !migrate_use_xbzrle())) {
1292 res
= ram_save_compressed_page(rs
, pss
, last_stage
);
1294 res
= ram_save_page(rs
, pss
, last_stage
);
1300 if (pss
->block
->unsentmap
) {
1301 clear_bit(pss
->page
, pss
->block
->unsentmap
);
1309 * ram_save_host_page: save a whole host page
1311 * Starting at *offset send pages up to the end of the current host
1312 * page. It's valid for the initial offset to point into the middle of
1313 * a host page in which case the remainder of the hostpage is sent.
1314 * Only dirty target pages are sent. Note that the host page size may
1315 * be a huge page for this block.
1316 * The saving stops at the boundary of the used_length of the block
1317 * if the RAMBlock isn't a multiple of the host page size.
1319 * Returns the number of pages written or negative on error
1321 * @rs: current RAM state
1322 * @ms: current migration state
1323 * @pss: data about the page we want to send
1324 * @last_stage: if we are at the completion stage
1326 static int ram_save_host_page(RAMState
*rs
, PageSearchStatus
*pss
,
1329 int tmppages
, pages
= 0;
1330 size_t pagesize_bits
=
1331 qemu_ram_pagesize(pss
->block
) >> TARGET_PAGE_BITS
;
1334 tmppages
= ram_save_target_page(rs
, pss
, last_stage
);
1341 } while ((pss
->page
& (pagesize_bits
- 1)) &&
1342 offset_in_ramblock(pss
->block
, pss
->page
<< TARGET_PAGE_BITS
));
1344 /* The offset we leave with is the last one we looked at */
1350 * ram_find_and_save_block: finds a dirty page and sends it to f
1352 * Called within an RCU critical section.
1354 * Returns the number of pages written where zero means no dirty pages
1356 * @rs: current RAM state
1357 * @last_stage: if we are at the completion stage
1359 * On systems where host-page-size > target-page-size it will send all the
1360 * pages in a host page that are dirty.
1363 static int ram_find_and_save_block(RAMState
*rs
, bool last_stage
)
1365 PageSearchStatus pss
;
1369 /* No dirty page as there is zero RAM */
1370 if (!ram_bytes_total()) {
1374 pss
.block
= rs
->last_seen_block
;
1375 pss
.page
= rs
->last_page
;
1376 pss
.complete_round
= false;
1379 pss
.block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1384 found
= get_queued_page(rs
, &pss
);
1387 /* priority queue empty, so just search for something dirty */
1388 found
= find_dirty_block(rs
, &pss
, &again
);
1392 pages
= ram_save_host_page(rs
, &pss
, last_stage
);
1394 } while (!pages
&& again
);
1396 rs
->last_seen_block
= pss
.block
;
1397 rs
->last_page
= pss
.page
;
1402 void acct_update_position(QEMUFile
*f
, size_t size
, bool zero
)
1404 uint64_t pages
= size
/ TARGET_PAGE_SIZE
;
1405 RAMState
*rs
= &ram_state
;
1408 rs
->zero_pages
+= pages
;
1410 rs
->norm_pages
+= pages
;
1411 rs
->bytes_transferred
+= size
;
1412 qemu_update_position(f
, size
);
1416 uint64_t ram_bytes_total(void)
1422 RAMBLOCK_FOREACH(block
) {
1423 total
+= block
->used_length
;
1429 void free_xbzrle_decoded_buf(void)
1431 g_free(xbzrle_decoded_buf
);
1432 xbzrle_decoded_buf
= NULL
;
1435 static void ram_migration_cleanup(void *opaque
)
1439 /* caller have hold iothread lock or is in a bh, so there is
1440 * no writing race against this migration_bitmap
1442 memory_global_dirty_log_stop();
1444 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1445 g_free(block
->bmap
);
1447 g_free(block
->unsentmap
);
1448 block
->unsentmap
= NULL
;
1451 XBZRLE_cache_lock();
1453 cache_fini(XBZRLE
.cache
);
1454 g_free(XBZRLE
.encoded_buf
);
1455 g_free(XBZRLE
.current_buf
);
1456 g_free(ZERO_TARGET_PAGE
);
1457 XBZRLE
.cache
= NULL
;
1458 XBZRLE
.encoded_buf
= NULL
;
1459 XBZRLE
.current_buf
= NULL
;
1461 XBZRLE_cache_unlock();
1464 static void ram_state_reset(RAMState
*rs
)
1466 rs
->last_seen_block
= NULL
;
1467 rs
->last_sent_block
= NULL
;
1469 rs
->last_version
= ram_list
.version
;
1470 rs
->ram_bulk_stage
= true;
1473 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1476 * 'expected' is the value you expect the bitmap mostly to be full
1477 * of; it won't bother printing lines that are all this value.
1478 * If 'todump' is null the migration bitmap is dumped.
1480 void ram_debug_dump_bitmap(unsigned long *todump
, bool expected
,
1481 unsigned long pages
)
1484 int64_t linelen
= 128;
1487 for (cur
= 0; cur
< pages
; cur
+= linelen
) {
1491 * Last line; catch the case where the line length
1492 * is longer than remaining ram
1494 if (cur
+ linelen
> pages
) {
1495 linelen
= pages
- cur
;
1497 for (curb
= 0; curb
< linelen
; curb
++) {
1498 bool thisbit
= test_bit(cur
+ curb
, todump
);
1499 linebuf
[curb
] = thisbit
? '1' : '.';
1500 found
= found
|| (thisbit
!= expected
);
1503 linebuf
[curb
] = '\0';
1504 fprintf(stderr
, "0x%08" PRIx64
" : %s\n", cur
, linebuf
);
1509 /* **** functions for postcopy ***** */
1511 void ram_postcopy_migrated_memory_release(MigrationState
*ms
)
1513 struct RAMBlock
*block
;
1515 RAMBLOCK_FOREACH(block
) {
1516 unsigned long *bitmap
= block
->bmap
;
1517 unsigned long range
= block
->used_length
>> TARGET_PAGE_BITS
;
1518 unsigned long run_start
= find_next_zero_bit(bitmap
, range
, 0);
1520 while (run_start
< range
) {
1521 unsigned long run_end
= find_next_bit(bitmap
, range
, run_start
+ 1);
1522 ram_discard_range(block
->idstr
, run_start
<< TARGET_PAGE_BITS
,
1523 (run_end
- run_start
) << TARGET_PAGE_BITS
);
1524 run_start
= find_next_zero_bit(bitmap
, range
, run_end
+ 1);
1530 * postcopy_send_discard_bm_ram: discard a RAMBlock
1532 * Returns zero on success
1534 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1535 * Note: At this point the 'unsentmap' is the processed bitmap combined
1536 * with the dirtymap; so a '1' means it's either dirty or unsent.
1538 * @ms: current migration state
1539 * @pds: state for postcopy
1540 * @start: RAMBlock starting page
1541 * @length: RAMBlock size
1543 static int postcopy_send_discard_bm_ram(MigrationState
*ms
,
1544 PostcopyDiscardState
*pds
,
1547 unsigned long end
= block
->used_length
>> TARGET_PAGE_BITS
;
1548 unsigned long current
;
1549 unsigned long *unsentmap
= block
->unsentmap
;
1551 for (current
= 0; current
< end
; ) {
1552 unsigned long one
= find_next_bit(unsentmap
, end
, current
);
1555 unsigned long zero
= find_next_zero_bit(unsentmap
, end
, one
+ 1);
1556 unsigned long discard_length
;
1559 discard_length
= end
- one
;
1561 discard_length
= zero
- one
;
1563 if (discard_length
) {
1564 postcopy_discard_send_range(ms
, pds
, one
, discard_length
);
1566 current
= one
+ discard_length
;
1576 * postcopy_each_ram_send_discard: discard all RAMBlocks
1578 * Returns 0 for success or negative for error
1580 * Utility for the outgoing postcopy code.
1581 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1582 * passing it bitmap indexes and name.
1583 * (qemu_ram_foreach_block ends up passing unscaled lengths
1584 * which would mean postcopy code would have to deal with target page)
1586 * @ms: current migration state
1588 static int postcopy_each_ram_send_discard(MigrationState
*ms
)
1590 struct RAMBlock
*block
;
1593 RAMBLOCK_FOREACH(block
) {
1594 PostcopyDiscardState
*pds
=
1595 postcopy_discard_send_init(ms
, block
->idstr
);
1598 * Postcopy sends chunks of bitmap over the wire, but it
1599 * just needs indexes at this point, avoids it having
1600 * target page specific code.
1602 ret
= postcopy_send_discard_bm_ram(ms
, pds
, block
);
1603 postcopy_discard_send_finish(ms
, pds
);
1613 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1615 * Helper for postcopy_chunk_hostpages; it's called twice to
1616 * canonicalize the two bitmaps, that are similar, but one is
1619 * Postcopy requires that all target pages in a hostpage are dirty or
1620 * clean, not a mix. This function canonicalizes the bitmaps.
1622 * @ms: current migration state
1623 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1624 * otherwise we need to canonicalize partially dirty host pages
1625 * @block: block that contains the page we want to canonicalize
1626 * @pds: state for postcopy
1628 static void postcopy_chunk_hostpages_pass(MigrationState
*ms
, bool unsent_pass
,
1630 PostcopyDiscardState
*pds
)
1632 RAMState
*rs
= &ram_state
;
1633 unsigned long *bitmap
= block
->bmap
;
1634 unsigned long *unsentmap
= block
->unsentmap
;
1635 unsigned int host_ratio
= block
->page_size
/ TARGET_PAGE_SIZE
;
1636 unsigned long pages
= block
->used_length
>> TARGET_PAGE_BITS
;
1637 unsigned long run_start
;
1639 if (block
->page_size
== TARGET_PAGE_SIZE
) {
1640 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1645 /* Find a sent page */
1646 run_start
= find_next_zero_bit(unsentmap
, pages
, 0);
1648 /* Find a dirty page */
1649 run_start
= find_next_bit(bitmap
, pages
, 0);
1652 while (run_start
< pages
) {
1653 bool do_fixup
= false;
1654 unsigned long fixup_start_addr
;
1655 unsigned long host_offset
;
1658 * If the start of this run of pages is in the middle of a host
1659 * page, then we need to fixup this host page.
1661 host_offset
= run_start
% host_ratio
;
1664 run_start
-= host_offset
;
1665 fixup_start_addr
= run_start
;
1666 /* For the next pass */
1667 run_start
= run_start
+ host_ratio
;
1669 /* Find the end of this run */
1670 unsigned long run_end
;
1672 run_end
= find_next_bit(unsentmap
, pages
, run_start
+ 1);
1674 run_end
= find_next_zero_bit(bitmap
, pages
, run_start
+ 1);
1677 * If the end isn't at the start of a host page, then the
1678 * run doesn't finish at the end of a host page
1679 * and we need to discard.
1681 host_offset
= run_end
% host_ratio
;
1684 fixup_start_addr
= run_end
- host_offset
;
1686 * This host page has gone, the next loop iteration starts
1687 * from after the fixup
1689 run_start
= fixup_start_addr
+ host_ratio
;
1692 * No discards on this iteration, next loop starts from
1693 * next sent/dirty page
1695 run_start
= run_end
+ 1;
1702 /* Tell the destination to discard this page */
1703 if (unsent_pass
|| !test_bit(fixup_start_addr
, unsentmap
)) {
1704 /* For the unsent_pass we:
1705 * discard partially sent pages
1706 * For the !unsent_pass (dirty) we:
1707 * discard partially dirty pages that were sent
1708 * (any partially sent pages were already discarded
1709 * by the previous unsent_pass)
1711 postcopy_discard_send_range(ms
, pds
, fixup_start_addr
,
1715 /* Clean up the bitmap */
1716 for (page
= fixup_start_addr
;
1717 page
< fixup_start_addr
+ host_ratio
; page
++) {
1718 /* All pages in this host page are now not sent */
1719 set_bit(page
, unsentmap
);
1722 * Remark them as dirty, updating the count for any pages
1723 * that weren't previously dirty.
1725 rs
->migration_dirty_pages
+= !test_and_set_bit(page
, bitmap
);
1730 /* Find the next sent page for the next iteration */
1731 run_start
= find_next_zero_bit(unsentmap
, pages
, run_start
);
1733 /* Find the next dirty page for the next iteration */
1734 run_start
= find_next_bit(bitmap
, pages
, run_start
);
1740 * postcopy_chuck_hostpages: discrad any partially sent host page
1742 * Utility for the outgoing postcopy code.
1744 * Discard any partially sent host-page size chunks, mark any partially
1745 * dirty host-page size chunks as all dirty. In this case the host-page
1746 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1748 * Returns zero on success
1750 * @ms: current migration state
1751 * @block: block we want to work with
1753 static int postcopy_chunk_hostpages(MigrationState
*ms
, RAMBlock
*block
)
1755 PostcopyDiscardState
*pds
=
1756 postcopy_discard_send_init(ms
, block
->idstr
);
1758 /* First pass: Discard all partially sent host pages */
1759 postcopy_chunk_hostpages_pass(ms
, true, block
, pds
);
1761 * Second pass: Ensure that all partially dirty host pages are made
1764 postcopy_chunk_hostpages_pass(ms
, false, block
, pds
);
1766 postcopy_discard_send_finish(ms
, pds
);
1771 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1773 * Returns zero on success
1775 * Transmit the set of pages to be discarded after precopy to the target
1776 * these are pages that:
1777 * a) Have been previously transmitted but are now dirty again
1778 * b) Pages that have never been transmitted, this ensures that
1779 * any pages on the destination that have been mapped by background
1780 * tasks get discarded (transparent huge pages is the specific concern)
1781 * Hopefully this is pretty sparse
1783 * @ms: current migration state
1785 int ram_postcopy_send_discard_bitmap(MigrationState
*ms
)
1787 RAMState
*rs
= &ram_state
;
1793 /* This should be our last sync, the src is now paused */
1794 migration_bitmap_sync(rs
);
1796 /* Easiest way to make sure we don't resume in the middle of a host-page */
1797 rs
->last_seen_block
= NULL
;
1798 rs
->last_sent_block
= NULL
;
1801 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1802 unsigned long pages
= block
->used_length
>> TARGET_PAGE_BITS
;
1803 unsigned long *bitmap
= block
->bmap
;
1804 unsigned long *unsentmap
= block
->unsentmap
;
1807 /* We don't have a safe way to resize the sentmap, so
1808 * if the bitmap was resized it will be NULL at this
1811 error_report("migration ram resized during precopy phase");
1815 /* Deal with TPS != HPS and huge pages */
1816 ret
= postcopy_chunk_hostpages(ms
, block
);
1823 * Update the unsentmap to be unsentmap = unsentmap | dirty
1825 bitmap_or(unsentmap
, unsentmap
, bitmap
, pages
);
1826 #ifdef DEBUG_POSTCOPY
1827 ram_debug_dump_bitmap(unsentmap
, true, pages
);
1830 trace_ram_postcopy_send_discard_bitmap();
1832 ret
= postcopy_each_ram_send_discard(ms
);
1839 * ram_discard_range: discard dirtied pages at the beginning of postcopy
1841 * Returns zero on success
1843 * @rbname: name of the RAMBlock of the request. NULL means the
1844 * same that last one.
1845 * @start: RAMBlock starting page
1846 * @length: RAMBlock size
1848 int ram_discard_range(const char *rbname
, uint64_t start
, size_t length
)
1852 trace_ram_discard_range(rbname
, start
, length
);
1855 RAMBlock
*rb
= qemu_ram_block_by_name(rbname
);
1858 error_report("ram_discard_range: Failed to find block '%s'", rbname
);
1862 ret
= ram_block_discard_range(rb
, start
, length
);
1870 static int ram_state_init(RAMState
*rs
)
1872 memset(rs
, 0, sizeof(*rs
));
1873 qemu_mutex_init(&rs
->bitmap_mutex
);
1874 qemu_mutex_init(&rs
->src_page_req_mutex
);
1875 QSIMPLEQ_INIT(&rs
->src_page_requests
);
1877 if (migrate_use_xbzrle()) {
1878 XBZRLE_cache_lock();
1879 ZERO_TARGET_PAGE
= g_malloc0(TARGET_PAGE_SIZE
);
1880 XBZRLE
.cache
= cache_init(migrate_xbzrle_cache_size() /
1883 if (!XBZRLE
.cache
) {
1884 XBZRLE_cache_unlock();
1885 error_report("Error creating cache");
1888 XBZRLE_cache_unlock();
1890 /* We prefer not to abort if there is no memory */
1891 XBZRLE
.encoded_buf
= g_try_malloc0(TARGET_PAGE_SIZE
);
1892 if (!XBZRLE
.encoded_buf
) {
1893 error_report("Error allocating encoded_buf");
1897 XBZRLE
.current_buf
= g_try_malloc(TARGET_PAGE_SIZE
);
1898 if (!XBZRLE
.current_buf
) {
1899 error_report("Error allocating current_buf");
1900 g_free(XBZRLE
.encoded_buf
);
1901 XBZRLE
.encoded_buf
= NULL
;
1906 /* For memory_global_dirty_log_start below. */
1907 qemu_mutex_lock_iothread();
1909 qemu_mutex_lock_ramlist();
1911 ram_state_reset(rs
);
1913 /* Skip setting bitmap if there is no RAM */
1914 if (ram_bytes_total()) {
1917 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1918 unsigned long pages
= block
->max_length
>> TARGET_PAGE_BITS
;
1920 block
->bmap
= bitmap_new(pages
);
1921 bitmap_set(block
->bmap
, 0, pages
);
1922 if (migrate_postcopy_ram()) {
1923 block
->unsentmap
= bitmap_new(pages
);
1924 bitmap_set(block
->unsentmap
, 0, pages
);
1930 * Count the total number of pages used by ram blocks not including any
1931 * gaps due to alignment or unplugs.
1933 rs
->migration_dirty_pages
= ram_bytes_total() >> TARGET_PAGE_BITS
;
1935 memory_global_dirty_log_start();
1936 migration_bitmap_sync(rs
);
1937 qemu_mutex_unlock_ramlist();
1938 qemu_mutex_unlock_iothread();
1945 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1946 * long-running RCU critical section. When rcu-reclaims in the code
1947 * start to become numerous it will be necessary to reduce the
1948 * granularity of these critical sections.
1952 * ram_save_setup: Setup RAM for migration
1954 * Returns zero to indicate success and negative for error
1956 * @f: QEMUFile where to send the data
1957 * @opaque: RAMState pointer
1959 static int ram_save_setup(QEMUFile
*f
, void *opaque
)
1961 RAMState
*rs
= opaque
;
1964 /* migration has already setup the bitmap, reuse it. */
1965 if (!migration_in_colo_state()) {
1966 if (ram_state_init(rs
) < 0) {
1974 qemu_put_be64(f
, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE
);
1976 RAMBLOCK_FOREACH(block
) {
1977 qemu_put_byte(f
, strlen(block
->idstr
));
1978 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, strlen(block
->idstr
));
1979 qemu_put_be64(f
, block
->used_length
);
1980 if (migrate_postcopy_ram() && block
->page_size
!= qemu_host_page_size
) {
1981 qemu_put_be64(f
, block
->page_size
);
1987 ram_control_before_iterate(f
, RAM_CONTROL_SETUP
);
1988 ram_control_after_iterate(f
, RAM_CONTROL_SETUP
);
1990 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
1996 * ram_save_iterate: iterative stage for migration
1998 * Returns zero to indicate success and negative for error
2000 * @f: QEMUFile where to send the data
2001 * @opaque: RAMState pointer
2003 static int ram_save_iterate(QEMUFile
*f
, void *opaque
)
2005 RAMState
*rs
= opaque
;
2012 if (ram_list
.version
!= rs
->last_version
) {
2013 ram_state_reset(rs
);
2016 /* Read version before ram_list.blocks */
2019 ram_control_before_iterate(f
, RAM_CONTROL_ROUND
);
2021 t0
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2023 while ((ret
= qemu_file_rate_limit(f
)) == 0) {
2026 pages
= ram_find_and_save_block(rs
, false);
2027 /* no more pages to sent */
2034 /* we want to check in the 1st loop, just in case it was the 1st time
2035 and we had to sync the dirty bitmap.
2036 qemu_get_clock_ns() is a bit expensive, so we only check each some
2039 if ((i
& 63) == 0) {
2040 uint64_t t1
= (qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - t0
) / 1000000;
2041 if (t1
> MAX_WAIT
) {
2042 trace_ram_save_iterate_big_wait(t1
, i
);
2048 flush_compressed_data(rs
);
2052 * Must occur before EOS (or any QEMUFile operation)
2053 * because of RDMA protocol.
2055 ram_control_after_iterate(f
, RAM_CONTROL_ROUND
);
2057 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2058 rs
->bytes_transferred
+= 8;
2060 ret
= qemu_file_get_error(f
);
2069 * ram_save_complete: function called to send the remaining amount of ram
2071 * Returns zero to indicate success
2073 * Called with iothread lock
2075 * @f: QEMUFile where to send the data
2076 * @opaque: RAMState pointer
2078 static int ram_save_complete(QEMUFile
*f
, void *opaque
)
2080 RAMState
*rs
= opaque
;
2084 if (!migration_in_postcopy()) {
2085 migration_bitmap_sync(rs
);
2088 ram_control_before_iterate(f
, RAM_CONTROL_FINISH
);
2090 /* try transferring iterative blocks of memory */
2092 /* flush all remaining blocks regardless of rate limiting */
2096 pages
= ram_find_and_save_block(rs
, !migration_in_colo_state());
2097 /* no more blocks to sent */
2103 flush_compressed_data(rs
);
2104 ram_control_after_iterate(f
, RAM_CONTROL_FINISH
);
2108 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2113 static void ram_save_pending(QEMUFile
*f
, void *opaque
, uint64_t max_size
,
2114 uint64_t *non_postcopiable_pending
,
2115 uint64_t *postcopiable_pending
)
2117 RAMState
*rs
= opaque
;
2118 uint64_t remaining_size
;
2120 remaining_size
= rs
->migration_dirty_pages
* TARGET_PAGE_SIZE
;
2122 if (!migration_in_postcopy() &&
2123 remaining_size
< max_size
) {
2124 qemu_mutex_lock_iothread();
2126 migration_bitmap_sync(rs
);
2128 qemu_mutex_unlock_iothread();
2129 remaining_size
= rs
->migration_dirty_pages
* TARGET_PAGE_SIZE
;
2132 /* We can do postcopy, and all the data is postcopiable */
2133 *postcopiable_pending
+= remaining_size
;
2136 static int load_xbzrle(QEMUFile
*f
, ram_addr_t addr
, void *host
)
2138 unsigned int xh_len
;
2140 uint8_t *loaded_data
;
2142 if (!xbzrle_decoded_buf
) {
2143 xbzrle_decoded_buf
= g_malloc(TARGET_PAGE_SIZE
);
2145 loaded_data
= xbzrle_decoded_buf
;
2147 /* extract RLE header */
2148 xh_flags
= qemu_get_byte(f
);
2149 xh_len
= qemu_get_be16(f
);
2151 if (xh_flags
!= ENCODING_FLAG_XBZRLE
) {
2152 error_report("Failed to load XBZRLE page - wrong compression!");
2156 if (xh_len
> TARGET_PAGE_SIZE
) {
2157 error_report("Failed to load XBZRLE page - len overflow!");
2160 /* load data and decode */
2161 qemu_get_buffer_in_place(f
, &loaded_data
, xh_len
);
2164 if (xbzrle_decode_buffer(loaded_data
, xh_len
, host
,
2165 TARGET_PAGE_SIZE
) == -1) {
2166 error_report("Failed to load XBZRLE page - decode error!");
2174 * ram_block_from_stream: read a RAMBlock id from the migration stream
2176 * Must be called from within a rcu critical section.
2178 * Returns a pointer from within the RCU-protected ram_list.
2180 * @f: QEMUFile where to read the data from
2181 * @flags: Page flags (mostly to see if it's a continuation of previous block)
2183 static inline RAMBlock
*ram_block_from_stream(QEMUFile
*f
, int flags
)
2185 static RAMBlock
*block
= NULL
;
2189 if (flags
& RAM_SAVE_FLAG_CONTINUE
) {
2191 error_report("Ack, bad migration stream!");
2197 len
= qemu_get_byte(f
);
2198 qemu_get_buffer(f
, (uint8_t *)id
, len
);
2201 block
= qemu_ram_block_by_name(id
);
2203 error_report("Can't find block %s", id
);
2210 static inline void *host_from_ram_block_offset(RAMBlock
*block
,
2213 if (!offset_in_ramblock(block
, offset
)) {
2217 return block
->host
+ offset
;
2221 * ram_handle_compressed: handle the zero page case
2223 * If a page (or a whole RDMA chunk) has been
2224 * determined to be zero, then zap it.
2226 * @host: host address for the zero page
2227 * @ch: what the page is filled from. We only support zero
2228 * @size: size of the zero page
2230 void ram_handle_compressed(void *host
, uint8_t ch
, uint64_t size
)
2232 if (ch
!= 0 || !is_zero_range(host
, size
)) {
2233 memset(host
, ch
, size
);
2237 static void *do_data_decompress(void *opaque
)
2239 DecompressParam
*param
= opaque
;
2240 unsigned long pagesize
;
2244 qemu_mutex_lock(¶m
->mutex
);
2245 while (!param
->quit
) {
2250 qemu_mutex_unlock(¶m
->mutex
);
2252 pagesize
= TARGET_PAGE_SIZE
;
2253 /* uncompress() will return failed in some case, especially
2254 * when the page is dirted when doing the compression, it's
2255 * not a problem because the dirty page will be retransferred
2256 * and uncompress() won't break the data in other pages.
2258 uncompress((Bytef
*)des
, &pagesize
,
2259 (const Bytef
*)param
->compbuf
, len
);
2261 qemu_mutex_lock(&decomp_done_lock
);
2263 qemu_cond_signal(&decomp_done_cond
);
2264 qemu_mutex_unlock(&decomp_done_lock
);
2266 qemu_mutex_lock(¶m
->mutex
);
2268 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
2271 qemu_mutex_unlock(¶m
->mutex
);
2276 static void wait_for_decompress_done(void)
2278 int idx
, thread_count
;
2280 if (!migrate_use_compression()) {
2284 thread_count
= migrate_decompress_threads();
2285 qemu_mutex_lock(&decomp_done_lock
);
2286 for (idx
= 0; idx
< thread_count
; idx
++) {
2287 while (!decomp_param
[idx
].done
) {
2288 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2291 qemu_mutex_unlock(&decomp_done_lock
);
2294 void migrate_decompress_threads_create(void)
2296 int i
, thread_count
;
2298 thread_count
= migrate_decompress_threads();
2299 decompress_threads
= g_new0(QemuThread
, thread_count
);
2300 decomp_param
= g_new0(DecompressParam
, thread_count
);
2301 qemu_mutex_init(&decomp_done_lock
);
2302 qemu_cond_init(&decomp_done_cond
);
2303 for (i
= 0; i
< thread_count
; i
++) {
2304 qemu_mutex_init(&decomp_param
[i
].mutex
);
2305 qemu_cond_init(&decomp_param
[i
].cond
);
2306 decomp_param
[i
].compbuf
= g_malloc0(compressBound(TARGET_PAGE_SIZE
));
2307 decomp_param
[i
].done
= true;
2308 decomp_param
[i
].quit
= false;
2309 qemu_thread_create(decompress_threads
+ i
, "decompress",
2310 do_data_decompress
, decomp_param
+ i
,
2311 QEMU_THREAD_JOINABLE
);
2315 void migrate_decompress_threads_join(void)
2317 int i
, thread_count
;
2319 thread_count
= migrate_decompress_threads();
2320 for (i
= 0; i
< thread_count
; i
++) {
2321 qemu_mutex_lock(&decomp_param
[i
].mutex
);
2322 decomp_param
[i
].quit
= true;
2323 qemu_cond_signal(&decomp_param
[i
].cond
);
2324 qemu_mutex_unlock(&decomp_param
[i
].mutex
);
2326 for (i
= 0; i
< thread_count
; i
++) {
2327 qemu_thread_join(decompress_threads
+ i
);
2328 qemu_mutex_destroy(&decomp_param
[i
].mutex
);
2329 qemu_cond_destroy(&decomp_param
[i
].cond
);
2330 g_free(decomp_param
[i
].compbuf
);
2332 g_free(decompress_threads
);
2333 g_free(decomp_param
);
2334 decompress_threads
= NULL
;
2335 decomp_param
= NULL
;
2338 static void decompress_data_with_multi_threads(QEMUFile
*f
,
2339 void *host
, int len
)
2341 int idx
, thread_count
;
2343 thread_count
= migrate_decompress_threads();
2344 qemu_mutex_lock(&decomp_done_lock
);
2346 for (idx
= 0; idx
< thread_count
; idx
++) {
2347 if (decomp_param
[idx
].done
) {
2348 decomp_param
[idx
].done
= false;
2349 qemu_mutex_lock(&decomp_param
[idx
].mutex
);
2350 qemu_get_buffer(f
, decomp_param
[idx
].compbuf
, len
);
2351 decomp_param
[idx
].des
= host
;
2352 decomp_param
[idx
].len
= len
;
2353 qemu_cond_signal(&decomp_param
[idx
].cond
);
2354 qemu_mutex_unlock(&decomp_param
[idx
].mutex
);
2358 if (idx
< thread_count
) {
2361 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2364 qemu_mutex_unlock(&decomp_done_lock
);
2368 * ram_postcopy_incoming_init: allocate postcopy data structures
2370 * Returns 0 for success and negative if there was one error
2372 * @mis: current migration incoming state
2374 * Allocate data structures etc needed by incoming migration with
2375 * postcopy-ram. postcopy-ram's similarly names
2376 * postcopy_ram_incoming_init does the work.
2378 int ram_postcopy_incoming_init(MigrationIncomingState
*mis
)
2380 unsigned long ram_pages
= last_ram_page();
2382 return postcopy_ram_incoming_init(mis
, ram_pages
);
2386 * ram_load_postcopy: load a page in postcopy case
2388 * Returns 0 for success or -errno in case of error
2390 * Called in postcopy mode by ram_load().
2391 * rcu_read_lock is taken prior to this being called.
2393 * @f: QEMUFile where to send the data
2395 static int ram_load_postcopy(QEMUFile
*f
)
2397 int flags
= 0, ret
= 0;
2398 bool place_needed
= false;
2399 bool matching_page_sizes
= false;
2400 MigrationIncomingState
*mis
= migration_incoming_get_current();
2401 /* Temporary page that is later 'placed' */
2402 void *postcopy_host_page
= postcopy_get_tmp_page(mis
);
2403 void *last_host
= NULL
;
2404 bool all_zero
= false;
2406 while (!ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
2409 void *page_buffer
= NULL
;
2410 void *place_source
= NULL
;
2411 RAMBlock
*block
= NULL
;
2414 addr
= qemu_get_be64(f
);
2415 flags
= addr
& ~TARGET_PAGE_MASK
;
2416 addr
&= TARGET_PAGE_MASK
;
2418 trace_ram_load_postcopy_loop((uint64_t)addr
, flags
);
2419 place_needed
= false;
2420 if (flags
& (RAM_SAVE_FLAG_ZERO
| RAM_SAVE_FLAG_PAGE
)) {
2421 block
= ram_block_from_stream(f
, flags
);
2423 host
= host_from_ram_block_offset(block
, addr
);
2425 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
2429 matching_page_sizes
= block
->page_size
== TARGET_PAGE_SIZE
;
2431 * Postcopy requires that we place whole host pages atomically;
2432 * these may be huge pages for RAMBlocks that are backed by
2434 * To make it atomic, the data is read into a temporary page
2435 * that's moved into place later.
2436 * The migration protocol uses, possibly smaller, target-pages
2437 * however the source ensures it always sends all the components
2438 * of a host page in order.
2440 page_buffer
= postcopy_host_page
+
2441 ((uintptr_t)host
& (block
->page_size
- 1));
2442 /* If all TP are zero then we can optimise the place */
2443 if (!((uintptr_t)host
& (block
->page_size
- 1))) {
2446 /* not the 1st TP within the HP */
2447 if (host
!= (last_host
+ TARGET_PAGE_SIZE
)) {
2448 error_report("Non-sequential target page %p/%p",
2457 * If it's the last part of a host page then we place the host
2460 place_needed
= (((uintptr_t)host
+ TARGET_PAGE_SIZE
) &
2461 (block
->page_size
- 1)) == 0;
2462 place_source
= postcopy_host_page
;
2466 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
2467 case RAM_SAVE_FLAG_ZERO
:
2468 ch
= qemu_get_byte(f
);
2469 memset(page_buffer
, ch
, TARGET_PAGE_SIZE
);
2475 case RAM_SAVE_FLAG_PAGE
:
2477 if (!place_needed
|| !matching_page_sizes
) {
2478 qemu_get_buffer(f
, page_buffer
, TARGET_PAGE_SIZE
);
2480 /* Avoids the qemu_file copy during postcopy, which is
2481 * going to do a copy later; can only do it when we
2482 * do this read in one go (matching page sizes)
2484 qemu_get_buffer_in_place(f
, (uint8_t **)&place_source
,
2488 case RAM_SAVE_FLAG_EOS
:
2492 error_report("Unknown combination of migration flags: %#x"
2493 " (postcopy mode)", flags
);
2498 /* This gets called at the last target page in the host page */
2499 void *place_dest
= host
+ TARGET_PAGE_SIZE
- block
->page_size
;
2502 ret
= postcopy_place_page_zero(mis
, place_dest
,
2505 ret
= postcopy_place_page(mis
, place_dest
,
2506 place_source
, block
->page_size
);
2510 ret
= qemu_file_get_error(f
);
2517 static int ram_load(QEMUFile
*f
, void *opaque
, int version_id
)
2519 int flags
= 0, ret
= 0;
2520 static uint64_t seq_iter
;
2523 * If system is running in postcopy mode, page inserts to host memory must
2526 bool postcopy_running
= postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING
;
2527 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2528 bool postcopy_advised
= postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE
;
2532 if (version_id
!= 4) {
2536 /* This RCU critical section can be very long running.
2537 * When RCU reclaims in the code start to become numerous,
2538 * it will be necessary to reduce the granularity of this
2543 if (postcopy_running
) {
2544 ret
= ram_load_postcopy(f
);
2547 while (!postcopy_running
&& !ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
2548 ram_addr_t addr
, total_ram_bytes
;
2552 addr
= qemu_get_be64(f
);
2553 flags
= addr
& ~TARGET_PAGE_MASK
;
2554 addr
&= TARGET_PAGE_MASK
;
2556 if (flags
& (RAM_SAVE_FLAG_ZERO
| RAM_SAVE_FLAG_PAGE
|
2557 RAM_SAVE_FLAG_COMPRESS_PAGE
| RAM_SAVE_FLAG_XBZRLE
)) {
2558 RAMBlock
*block
= ram_block_from_stream(f
, flags
);
2560 host
= host_from_ram_block_offset(block
, addr
);
2562 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
2566 trace_ram_load_loop(block
->idstr
, (uint64_t)addr
, flags
, host
);
2569 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
2570 case RAM_SAVE_FLAG_MEM_SIZE
:
2571 /* Synchronize RAM block list */
2572 total_ram_bytes
= addr
;
2573 while (!ret
&& total_ram_bytes
) {
2578 len
= qemu_get_byte(f
);
2579 qemu_get_buffer(f
, (uint8_t *)id
, len
);
2581 length
= qemu_get_be64(f
);
2583 block
= qemu_ram_block_by_name(id
);
2585 if (length
!= block
->used_length
) {
2586 Error
*local_err
= NULL
;
2588 ret
= qemu_ram_resize(block
, length
,
2591 error_report_err(local_err
);
2594 /* For postcopy we need to check hugepage sizes match */
2595 if (postcopy_advised
&&
2596 block
->page_size
!= qemu_host_page_size
) {
2597 uint64_t remote_page_size
= qemu_get_be64(f
);
2598 if (remote_page_size
!= block
->page_size
) {
2599 error_report("Mismatched RAM page size %s "
2600 "(local) %zd != %" PRId64
,
2601 id
, block
->page_size
,
2606 ram_control_load_hook(f
, RAM_CONTROL_BLOCK_REG
,
2609 error_report("Unknown ramblock \"%s\", cannot "
2610 "accept migration", id
);
2614 total_ram_bytes
-= length
;
2618 case RAM_SAVE_FLAG_ZERO
:
2619 ch
= qemu_get_byte(f
);
2620 ram_handle_compressed(host
, ch
, TARGET_PAGE_SIZE
);
2623 case RAM_SAVE_FLAG_PAGE
:
2624 qemu_get_buffer(f
, host
, TARGET_PAGE_SIZE
);
2627 case RAM_SAVE_FLAG_COMPRESS_PAGE
:
2628 len
= qemu_get_be32(f
);
2629 if (len
< 0 || len
> compressBound(TARGET_PAGE_SIZE
)) {
2630 error_report("Invalid compressed data length: %d", len
);
2634 decompress_data_with_multi_threads(f
, host
, len
);
2637 case RAM_SAVE_FLAG_XBZRLE
:
2638 if (load_xbzrle(f
, addr
, host
) < 0) {
2639 error_report("Failed to decompress XBZRLE page at "
2640 RAM_ADDR_FMT
, addr
);
2645 case RAM_SAVE_FLAG_EOS
:
2649 if (flags
& RAM_SAVE_FLAG_HOOK
) {
2650 ram_control_load_hook(f
, RAM_CONTROL_HOOK
, NULL
);
2652 error_report("Unknown combination of migration flags: %#x",
2658 ret
= qemu_file_get_error(f
);
2662 wait_for_decompress_done();
2664 trace_ram_load_complete(ret
, seq_iter
);
2668 static SaveVMHandlers savevm_ram_handlers
= {
2669 .save_live_setup
= ram_save_setup
,
2670 .save_live_iterate
= ram_save_iterate
,
2671 .save_live_complete_postcopy
= ram_save_complete
,
2672 .save_live_complete_precopy
= ram_save_complete
,
2673 .save_live_pending
= ram_save_pending
,
2674 .load_state
= ram_load
,
2675 .cleanup
= ram_migration_cleanup
,
2678 void ram_mig_init(void)
2680 qemu_mutex_init(&XBZRLE
.lock
);
2681 register_savevm_live(NULL
, "ram", 0, 4, &savevm_ram_handlers
, &ram_state
);