4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
8 * Juan Quintela <quintela@redhat.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28 #include "qemu/osdep.h"
29 #include "qemu-common.h"
32 #include "qapi-event.h"
33 #include "qemu/cutils.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "qemu/timer.h"
37 #include "qemu/main-loop.h"
39 #include "migration/migration.h"
40 #include "migration/qemu-file.h"
41 #include "migration/vmstate.h"
42 #include "postcopy-ram.h"
43 #include "exec/address-spaces.h"
44 #include "migration/page_cache.h"
45 #include "qemu/error-report.h"
47 #include "exec/ram_addr.h"
48 #include "qemu/rcu_queue.h"
49 #include "migration/colo.h"
51 /***********************************************************/
52 /* ram save/restore */
54 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
55 * worked for pages that where filled with the same char. We switched
56 * it to only search for the zero value. And to avoid confusion with
57 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
60 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
61 #define RAM_SAVE_FLAG_ZERO 0x02
62 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
63 #define RAM_SAVE_FLAG_PAGE 0x08
64 #define RAM_SAVE_FLAG_EOS 0x10
65 #define RAM_SAVE_FLAG_CONTINUE 0x20
66 #define RAM_SAVE_FLAG_XBZRLE 0x40
67 /* 0x80 is reserved in migration.h start with 0x100 next */
68 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
70 static uint8_t *ZERO_TARGET_PAGE
;
72 static inline bool is_zero_range(uint8_t *p
, uint64_t size
)
74 return buffer_is_zero(p
, size
);
77 /* struct contains XBZRLE cache and a static page
78 used by the compression */
80 /* buffer used for XBZRLE encoding */
82 /* buffer for storing page content */
84 /* Cache for XBZRLE, Protected by lock. */
89 /* buffer used for XBZRLE decoding */
90 static uint8_t *xbzrle_decoded_buf
;
92 static void XBZRLE_cache_lock(void)
94 if (migrate_use_xbzrle())
95 qemu_mutex_lock(&XBZRLE
.lock
);
98 static void XBZRLE_cache_unlock(void)
100 if (migrate_use_xbzrle())
101 qemu_mutex_unlock(&XBZRLE
.lock
);
105 * xbzrle_cache_resize: resize the xbzrle cache
107 * This function is called from qmp_migrate_set_cache_size in main
108 * thread, possibly while a migration is in progress. A running
109 * migration may be using the cache and might finish during this call,
110 * hence changes to the cache are protected by XBZRLE.lock().
112 * Returns the new_size or negative in case of error.
114 * @new_size: new cache size
116 int64_t xbzrle_cache_resize(int64_t new_size
)
118 PageCache
*new_cache
;
121 if (new_size
< TARGET_PAGE_SIZE
) {
127 if (XBZRLE
.cache
!= NULL
) {
128 if (pow2floor(new_size
) == migrate_xbzrle_cache_size()) {
131 new_cache
= cache_init(new_size
/ TARGET_PAGE_SIZE
,
134 error_report("Error creating cache");
139 cache_fini(XBZRLE
.cache
);
140 XBZRLE
.cache
= new_cache
;
144 ret
= pow2floor(new_size
);
146 XBZRLE_cache_unlock();
151 * An outstanding page request, on the source, having been received
154 struct RAMSrcPageRequest
{
159 QSIMPLEQ_ENTRY(RAMSrcPageRequest
) next_req
;
162 /* State of RAM for migration */
164 /* QEMUFile used for this migration */
166 /* Last block that we have visited searching for dirty pages */
167 RAMBlock
*last_seen_block
;
168 /* Last block from where we have sent data */
169 RAMBlock
*last_sent_block
;
170 /* Last dirty target page we have sent */
171 ram_addr_t last_page
;
172 /* last ram version we have seen */
173 uint32_t last_version
;
174 /* We are in the first round */
176 /* How many times we have dirty too many pages */
177 int dirty_rate_high_cnt
;
178 /* How many times we have synchronized the bitmap */
179 uint64_t bitmap_sync_count
;
180 /* these variables are used for bitmap sync */
181 /* last time we did a full bitmap_sync */
182 int64_t time_last_bitmap_sync
;
183 /* bytes transferred at start_time */
184 uint64_t bytes_xfer_prev
;
185 /* number of dirty pages since start_time */
186 uint64_t num_dirty_pages_period
;
187 /* xbzrle misses since the beginning of the period */
188 uint64_t xbzrle_cache_miss_prev
;
189 /* number of iterations at the beginning of period */
190 uint64_t iterations_prev
;
191 /* Accounting fields */
192 /* number of zero pages. It used to be pages filled by the same char. */
194 /* number of normal transferred pages */
196 /* Iterations since start */
198 /* xbzrle transmitted bytes. Notice that this is with
199 * compression, they can't be calculated from the pages */
200 uint64_t xbzrle_bytes
;
201 /* xbzrle transmmited pages */
202 uint64_t xbzrle_pages
;
203 /* xbzrle number of cache miss */
204 uint64_t xbzrle_cache_miss
;
205 /* xbzrle miss rate */
206 double xbzrle_cache_miss_rate
;
207 /* xbzrle number of overflows */
208 uint64_t xbzrle_overflows
;
209 /* number of dirty bits in the bitmap */
210 uint64_t migration_dirty_pages
;
211 /* total number of bytes transferred */
212 uint64_t bytes_transferred
;
213 /* number of dirtied pages in the last second */
214 uint64_t dirty_pages_rate
;
215 /* Count of requests incoming from destination */
216 uint64_t postcopy_requests
;
217 /* protects modification of the bitmap */
218 QemuMutex bitmap_mutex
;
219 /* The RAMBlock used in the last src_page_requests */
220 RAMBlock
*last_req_rb
;
221 /* Queue of outstanding page requests from the destination */
222 QemuMutex src_page_req_mutex
;
223 QSIMPLEQ_HEAD(src_page_requests
, RAMSrcPageRequest
) src_page_requests
;
225 typedef struct RAMState RAMState
;
227 static RAMState ram_state
;
229 uint64_t dup_mig_pages_transferred(void)
231 return ram_state
.zero_pages
;
234 uint64_t norm_mig_pages_transferred(void)
236 return ram_state
.norm_pages
;
239 uint64_t xbzrle_mig_bytes_transferred(void)
241 return ram_state
.xbzrle_bytes
;
244 uint64_t xbzrle_mig_pages_transferred(void)
246 return ram_state
.xbzrle_pages
;
249 uint64_t xbzrle_mig_pages_cache_miss(void)
251 return ram_state
.xbzrle_cache_miss
;
254 double xbzrle_mig_cache_miss_rate(void)
256 return ram_state
.xbzrle_cache_miss_rate
;
259 uint64_t xbzrle_mig_pages_overflow(void)
261 return ram_state
.xbzrle_overflows
;
264 uint64_t ram_bytes_transferred(void)
266 return ram_state
.bytes_transferred
;
269 uint64_t ram_bytes_remaining(void)
271 return ram_state
.migration_dirty_pages
* TARGET_PAGE_SIZE
;
274 uint64_t ram_dirty_sync_count(void)
276 return ram_state
.bitmap_sync_count
;
279 uint64_t ram_dirty_pages_rate(void)
281 return ram_state
.dirty_pages_rate
;
284 uint64_t ram_postcopy_requests(void)
286 return ram_state
.postcopy_requests
;
289 /* used by the search for pages to send */
290 struct PageSearchStatus
{
291 /* Current block being searched */
293 /* Current page to search from */
295 /* Set once we wrap around */
298 typedef struct PageSearchStatus PageSearchStatus
;
300 struct CompressParam
{
309 typedef struct CompressParam CompressParam
;
311 struct DecompressParam
{
320 typedef struct DecompressParam DecompressParam
;
322 static CompressParam
*comp_param
;
323 static QemuThread
*compress_threads
;
324 /* comp_done_cond is used to wake up the migration thread when
325 * one of the compression threads has finished the compression.
326 * comp_done_lock is used to co-work with comp_done_cond.
328 static QemuMutex comp_done_lock
;
329 static QemuCond comp_done_cond
;
330 /* The empty QEMUFileOps will be used by file in CompressParam */
331 static const QEMUFileOps empty_ops
= { };
333 static DecompressParam
*decomp_param
;
334 static QemuThread
*decompress_threads
;
335 static QemuMutex decomp_done_lock
;
336 static QemuCond decomp_done_cond
;
338 static int do_compress_ram_page(QEMUFile
*f
, RAMBlock
*block
,
341 static void *do_data_compress(void *opaque
)
343 CompressParam
*param
= opaque
;
347 qemu_mutex_lock(¶m
->mutex
);
348 while (!param
->quit
) {
350 block
= param
->block
;
351 offset
= param
->offset
;
353 qemu_mutex_unlock(¶m
->mutex
);
355 do_compress_ram_page(param
->file
, block
, offset
);
357 qemu_mutex_lock(&comp_done_lock
);
359 qemu_cond_signal(&comp_done_cond
);
360 qemu_mutex_unlock(&comp_done_lock
);
362 qemu_mutex_lock(¶m
->mutex
);
364 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
367 qemu_mutex_unlock(¶m
->mutex
);
372 static inline void terminate_compression_threads(void)
374 int idx
, thread_count
;
376 thread_count
= migrate_compress_threads();
378 for (idx
= 0; idx
< thread_count
; idx
++) {
379 qemu_mutex_lock(&comp_param
[idx
].mutex
);
380 comp_param
[idx
].quit
= true;
381 qemu_cond_signal(&comp_param
[idx
].cond
);
382 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
386 void migrate_compress_threads_join(void)
390 if (!migrate_use_compression()) {
393 terminate_compression_threads();
394 thread_count
= migrate_compress_threads();
395 for (i
= 0; i
< thread_count
; i
++) {
396 qemu_thread_join(compress_threads
+ i
);
397 qemu_fclose(comp_param
[i
].file
);
398 qemu_mutex_destroy(&comp_param
[i
].mutex
);
399 qemu_cond_destroy(&comp_param
[i
].cond
);
401 qemu_mutex_destroy(&comp_done_lock
);
402 qemu_cond_destroy(&comp_done_cond
);
403 g_free(compress_threads
);
405 compress_threads
= NULL
;
409 void migrate_compress_threads_create(void)
413 if (!migrate_use_compression()) {
416 thread_count
= migrate_compress_threads();
417 compress_threads
= g_new0(QemuThread
, thread_count
);
418 comp_param
= g_new0(CompressParam
, thread_count
);
419 qemu_cond_init(&comp_done_cond
);
420 qemu_mutex_init(&comp_done_lock
);
421 for (i
= 0; i
< thread_count
; i
++) {
422 /* comp_param[i].file is just used as a dummy buffer to save data,
423 * set its ops to empty.
425 comp_param
[i
].file
= qemu_fopen_ops(NULL
, &empty_ops
);
426 comp_param
[i
].done
= true;
427 comp_param
[i
].quit
= false;
428 qemu_mutex_init(&comp_param
[i
].mutex
);
429 qemu_cond_init(&comp_param
[i
].cond
);
430 qemu_thread_create(compress_threads
+ i
, "compress",
431 do_data_compress
, comp_param
+ i
,
432 QEMU_THREAD_JOINABLE
);
437 * save_page_header: write page header to wire
439 * If this is the 1st block, it also writes the block identification
441 * Returns the number of bytes written
443 * @f: QEMUFile where to send the data
444 * @block: block that contains the page we want to send
445 * @offset: offset inside the block for the page
446 * in the lower bits, it contains flags
448 static size_t save_page_header(RAMState
*rs
, QEMUFile
*f
, RAMBlock
*block
,
453 if (block
== rs
->last_sent_block
) {
454 offset
|= RAM_SAVE_FLAG_CONTINUE
;
456 qemu_put_be64(f
, offset
);
459 if (!(offset
& RAM_SAVE_FLAG_CONTINUE
)) {
460 len
= strlen(block
->idstr
);
461 qemu_put_byte(f
, len
);
462 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, len
);
464 rs
->last_sent_block
= block
;
470 * mig_throttle_guest_down: throotle down the guest
472 * Reduce amount of guest cpu execution to hopefully slow down memory
473 * writes. If guest dirty memory rate is reduced below the rate at
474 * which we can transfer pages to the destination then we should be
475 * able to complete migration. Some workloads dirty memory way too
476 * fast and will not effectively converge, even with auto-converge.
478 static void mig_throttle_guest_down(void)
480 MigrationState
*s
= migrate_get_current();
481 uint64_t pct_initial
= s
->parameters
.cpu_throttle_initial
;
482 uint64_t pct_icrement
= s
->parameters
.cpu_throttle_increment
;
484 /* We have not started throttling yet. Let's start it. */
485 if (!cpu_throttle_active()) {
486 cpu_throttle_set(pct_initial
);
488 /* Throttling already on, just increase the rate */
489 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement
);
494 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
496 * @rs: current RAM state
497 * @current_addr: address for the zero page
499 * Update the xbzrle cache to reflect a page that's been sent as all 0.
500 * The important thing is that a stale (not-yet-0'd) page be replaced
502 * As a bonus, if the page wasn't in the cache it gets added so that
503 * when a small write is made into the 0'd page it gets XBZRLE sent.
505 static void xbzrle_cache_zero_page(RAMState
*rs
, ram_addr_t current_addr
)
507 if (rs
->ram_bulk_stage
|| !migrate_use_xbzrle()) {
511 /* We don't care if this fails to allocate a new cache page
512 * as long as it updated an old one */
513 cache_insert(XBZRLE
.cache
, current_addr
, ZERO_TARGET_PAGE
,
514 rs
->bitmap_sync_count
);
517 #define ENCODING_FLAG_XBZRLE 0x1
520 * save_xbzrle_page: compress and send current page
522 * Returns: 1 means that we wrote the page
523 * 0 means that page is identical to the one already sent
524 * -1 means that xbzrle would be longer than normal
526 * @rs: current RAM state
527 * @current_data: pointer to the address of the page contents
528 * @current_addr: addr of the page
529 * @block: block that contains the page we want to send
530 * @offset: offset inside the block for the page
531 * @last_stage: if we are at the completion stage
533 static int save_xbzrle_page(RAMState
*rs
, uint8_t **current_data
,
534 ram_addr_t current_addr
, RAMBlock
*block
,
535 ram_addr_t offset
, bool last_stage
)
537 int encoded_len
= 0, bytes_xbzrle
;
538 uint8_t *prev_cached_page
;
540 if (!cache_is_cached(XBZRLE
.cache
, current_addr
, rs
->bitmap_sync_count
)) {
541 rs
->xbzrle_cache_miss
++;
543 if (cache_insert(XBZRLE
.cache
, current_addr
, *current_data
,
544 rs
->bitmap_sync_count
) == -1) {
547 /* update *current_data when the page has been
548 inserted into cache */
549 *current_data
= get_cached_data(XBZRLE
.cache
, current_addr
);
555 prev_cached_page
= get_cached_data(XBZRLE
.cache
, current_addr
);
557 /* save current buffer into memory */
558 memcpy(XBZRLE
.current_buf
, *current_data
, TARGET_PAGE_SIZE
);
560 /* XBZRLE encoding (if there is no overflow) */
561 encoded_len
= xbzrle_encode_buffer(prev_cached_page
, XBZRLE
.current_buf
,
562 TARGET_PAGE_SIZE
, XBZRLE
.encoded_buf
,
564 if (encoded_len
== 0) {
565 trace_save_xbzrle_page_skipping();
567 } else if (encoded_len
== -1) {
568 trace_save_xbzrle_page_overflow();
569 rs
->xbzrle_overflows
++;
570 /* update data in the cache */
572 memcpy(prev_cached_page
, *current_data
, TARGET_PAGE_SIZE
);
573 *current_data
= prev_cached_page
;
578 /* we need to update the data in the cache, in order to get the same data */
580 memcpy(prev_cached_page
, XBZRLE
.current_buf
, TARGET_PAGE_SIZE
);
583 /* Send XBZRLE based compressed page */
584 bytes_xbzrle
= save_page_header(rs
, rs
->f
, block
,
585 offset
| RAM_SAVE_FLAG_XBZRLE
);
586 qemu_put_byte(rs
->f
, ENCODING_FLAG_XBZRLE
);
587 qemu_put_be16(rs
->f
, encoded_len
);
588 qemu_put_buffer(rs
->f
, XBZRLE
.encoded_buf
, encoded_len
);
589 bytes_xbzrle
+= encoded_len
+ 1 + 2;
591 rs
->xbzrle_bytes
+= bytes_xbzrle
;
592 rs
->bytes_transferred
+= bytes_xbzrle
;
598 * migration_bitmap_find_dirty: find the next dirty page from start
600 * Called with rcu_read_lock() to protect migration_bitmap
602 * Returns the byte offset within memory region of the start of a dirty page
604 * @rs: current RAM state
605 * @rb: RAMBlock where to search for dirty pages
606 * @start: page where we start the search
609 unsigned long migration_bitmap_find_dirty(RAMState
*rs
, RAMBlock
*rb
,
612 unsigned long size
= rb
->used_length
>> TARGET_PAGE_BITS
;
613 unsigned long *bitmap
= rb
->bmap
;
616 if (rs
->ram_bulk_stage
&& start
> 0) {
619 next
= find_next_bit(bitmap
, size
, start
);
625 static inline bool migration_bitmap_clear_dirty(RAMState
*rs
,
631 ret
= test_and_clear_bit(page
, rb
->bmap
);
634 rs
->migration_dirty_pages
--;
639 static void migration_bitmap_sync_range(RAMState
*rs
, RAMBlock
*rb
,
640 ram_addr_t start
, ram_addr_t length
)
642 rs
->migration_dirty_pages
+=
643 cpu_physical_memory_sync_dirty_bitmap(rb
, start
, length
,
644 &rs
->num_dirty_pages_period
);
648 * ram_pagesize_summary: calculate all the pagesizes of a VM
650 * Returns a summary bitmap of the page sizes of all RAMBlocks
652 * For VMs with just normal pages this is equivalent to the host page
653 * size. If it's got some huge pages then it's the OR of all the
654 * different page sizes.
656 uint64_t ram_pagesize_summary(void)
659 uint64_t summary
= 0;
661 RAMBLOCK_FOREACH(block
) {
662 summary
|= block
->page_size
;
668 static void migration_bitmap_sync(RAMState
*rs
)
672 uint64_t bytes_xfer_now
;
674 rs
->bitmap_sync_count
++;
676 if (!rs
->bytes_xfer_prev
) {
677 rs
->bytes_xfer_prev
= ram_bytes_transferred();
680 if (!rs
->time_last_bitmap_sync
) {
681 rs
->time_last_bitmap_sync
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
684 trace_migration_bitmap_sync_start();
685 memory_global_dirty_log_sync();
687 qemu_mutex_lock(&rs
->bitmap_mutex
);
689 RAMBLOCK_FOREACH(block
) {
690 migration_bitmap_sync_range(rs
, block
, 0, block
->used_length
);
693 qemu_mutex_unlock(&rs
->bitmap_mutex
);
695 trace_migration_bitmap_sync_end(rs
->num_dirty_pages_period
);
697 end_time
= qemu_clock_get_ms(QEMU_CLOCK_REALTIME
);
699 /* more than 1 second = 1000 millisecons */
700 if (end_time
> rs
->time_last_bitmap_sync
+ 1000) {
701 if (migrate_auto_converge()) {
702 /* The following detection logic can be refined later. For now:
703 Check to see if the dirtied bytes is 50% more than the approx.
704 amount of bytes that just got transferred since the last time we
705 were in this routine. If that happens twice, start or increase
707 bytes_xfer_now
= ram_bytes_transferred();
709 if (rs
->dirty_pages_rate
&&
710 (rs
->num_dirty_pages_period
* TARGET_PAGE_SIZE
>
711 (bytes_xfer_now
- rs
->bytes_xfer_prev
) / 2) &&
712 (rs
->dirty_rate_high_cnt
++ >= 2)) {
713 trace_migration_throttle();
714 rs
->dirty_rate_high_cnt
= 0;
715 mig_throttle_guest_down();
717 rs
->bytes_xfer_prev
= bytes_xfer_now
;
720 if (migrate_use_xbzrle()) {
721 if (rs
->iterations_prev
!= rs
->iterations
) {
722 rs
->xbzrle_cache_miss_rate
=
723 (double)(rs
->xbzrle_cache_miss
-
724 rs
->xbzrle_cache_miss_prev
) /
725 (rs
->iterations
- rs
->iterations_prev
);
727 rs
->iterations_prev
= rs
->iterations
;
728 rs
->xbzrle_cache_miss_prev
= rs
->xbzrle_cache_miss
;
730 rs
->dirty_pages_rate
= rs
->num_dirty_pages_period
* 1000
731 / (end_time
- rs
->time_last_bitmap_sync
);
732 rs
->time_last_bitmap_sync
= end_time
;
733 rs
->num_dirty_pages_period
= 0;
735 if (migrate_use_events()) {
736 qapi_event_send_migration_pass(rs
->bitmap_sync_count
, NULL
);
741 * save_zero_page: send the zero page to the stream
743 * Returns the number of pages written.
745 * @rs: current RAM state
746 * @block: block that contains the page we want to send
747 * @offset: offset inside the block for the page
748 * @p: pointer to the page
750 static int save_zero_page(RAMState
*rs
, RAMBlock
*block
, ram_addr_t offset
,
755 if (is_zero_range(p
, TARGET_PAGE_SIZE
)) {
757 rs
->bytes_transferred
+=
758 save_page_header(rs
, rs
->f
, block
, offset
| RAM_SAVE_FLAG_ZERO
);
759 qemu_put_byte(rs
->f
, 0);
760 rs
->bytes_transferred
+= 1;
767 static void ram_release_pages(const char *rbname
, uint64_t offset
, int pages
)
769 if (!migrate_release_ram() || !migration_in_postcopy()) {
773 ram_discard_range(rbname
, offset
, pages
<< TARGET_PAGE_BITS
);
777 * ram_save_page: send the given page to the stream
779 * Returns the number of pages written.
781 * >=0 - Number of pages written - this might legally be 0
782 * if xbzrle noticed the page was the same.
784 * @rs: current RAM state
785 * @block: block that contains the page we want to send
786 * @offset: offset inside the block for the page
787 * @last_stage: if we are at the completion stage
789 static int ram_save_page(RAMState
*rs
, PageSearchStatus
*pss
, bool last_stage
)
793 ram_addr_t current_addr
;
796 bool send_async
= true;
797 RAMBlock
*block
= pss
->block
;
798 ram_addr_t offset
= pss
->page
<< TARGET_PAGE_BITS
;
800 p
= block
->host
+ offset
;
801 trace_ram_save_page(block
->idstr
, (uint64_t)offset
, p
);
803 /* In doubt sent page as normal */
805 ret
= ram_control_save_page(rs
->f
, block
->offset
,
806 offset
, TARGET_PAGE_SIZE
, &bytes_xmit
);
808 rs
->bytes_transferred
+= bytes_xmit
;
814 current_addr
= block
->offset
+ offset
;
816 if (ret
!= RAM_SAVE_CONTROL_NOT_SUPP
) {
817 if (ret
!= RAM_SAVE_CONTROL_DELAYED
) {
818 if (bytes_xmit
> 0) {
820 } else if (bytes_xmit
== 0) {
825 pages
= save_zero_page(rs
, block
, offset
, p
);
827 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
828 * page would be stale
830 xbzrle_cache_zero_page(rs
, current_addr
);
831 ram_release_pages(block
->idstr
, offset
, pages
);
832 } else if (!rs
->ram_bulk_stage
&&
833 !migration_in_postcopy() && migrate_use_xbzrle()) {
834 pages
= save_xbzrle_page(rs
, &p
, current_addr
, block
,
837 /* Can't send this cached data async, since the cache page
838 * might get updated before it gets to the wire
845 /* XBZRLE overflow or normal page */
847 rs
->bytes_transferred
+= save_page_header(rs
, rs
->f
, block
,
848 offset
| RAM_SAVE_FLAG_PAGE
);
850 qemu_put_buffer_async(rs
->f
, p
, TARGET_PAGE_SIZE
,
851 migrate_release_ram() &
852 migration_in_postcopy());
854 qemu_put_buffer(rs
->f
, p
, TARGET_PAGE_SIZE
);
856 rs
->bytes_transferred
+= TARGET_PAGE_SIZE
;
861 XBZRLE_cache_unlock();
866 static int do_compress_ram_page(QEMUFile
*f
, RAMBlock
*block
,
869 RAMState
*rs
= &ram_state
;
870 int bytes_sent
, blen
;
871 uint8_t *p
= block
->host
+ (offset
& TARGET_PAGE_MASK
);
873 bytes_sent
= save_page_header(rs
, f
, block
, offset
|
874 RAM_SAVE_FLAG_COMPRESS_PAGE
);
875 blen
= qemu_put_compression_data(f
, p
, TARGET_PAGE_SIZE
,
876 migrate_compress_level());
879 qemu_file_set_error(migrate_get_current()->to_dst_file
, blen
);
880 error_report("compressed data failed!");
883 ram_release_pages(block
->idstr
, offset
& TARGET_PAGE_MASK
, 1);
889 static void flush_compressed_data(RAMState
*rs
)
891 int idx
, len
, thread_count
;
893 if (!migrate_use_compression()) {
896 thread_count
= migrate_compress_threads();
898 qemu_mutex_lock(&comp_done_lock
);
899 for (idx
= 0; idx
< thread_count
; idx
++) {
900 while (!comp_param
[idx
].done
) {
901 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
904 qemu_mutex_unlock(&comp_done_lock
);
906 for (idx
= 0; idx
< thread_count
; idx
++) {
907 qemu_mutex_lock(&comp_param
[idx
].mutex
);
908 if (!comp_param
[idx
].quit
) {
909 len
= qemu_put_qemu_file(rs
->f
, comp_param
[idx
].file
);
910 rs
->bytes_transferred
+= len
;
912 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
916 static inline void set_compress_params(CompressParam
*param
, RAMBlock
*block
,
919 param
->block
= block
;
920 param
->offset
= offset
;
923 static int compress_page_with_multi_thread(RAMState
*rs
, RAMBlock
*block
,
926 int idx
, thread_count
, bytes_xmit
= -1, pages
= -1;
928 thread_count
= migrate_compress_threads();
929 qemu_mutex_lock(&comp_done_lock
);
931 for (idx
= 0; idx
< thread_count
; idx
++) {
932 if (comp_param
[idx
].done
) {
933 comp_param
[idx
].done
= false;
934 bytes_xmit
= qemu_put_qemu_file(rs
->f
, comp_param
[idx
].file
);
935 qemu_mutex_lock(&comp_param
[idx
].mutex
);
936 set_compress_params(&comp_param
[idx
], block
, offset
);
937 qemu_cond_signal(&comp_param
[idx
].cond
);
938 qemu_mutex_unlock(&comp_param
[idx
].mutex
);
941 rs
->bytes_transferred
+= bytes_xmit
;
948 qemu_cond_wait(&comp_done_cond
, &comp_done_lock
);
951 qemu_mutex_unlock(&comp_done_lock
);
957 * ram_save_compressed_page: compress the given page and send it to the stream
959 * Returns the number of pages written.
961 * @rs: current RAM state
962 * @block: block that contains the page we want to send
963 * @offset: offset inside the block for the page
964 * @last_stage: if we are at the completion stage
966 static int ram_save_compressed_page(RAMState
*rs
, PageSearchStatus
*pss
,
970 uint64_t bytes_xmit
= 0;
973 RAMBlock
*block
= pss
->block
;
974 ram_addr_t offset
= pss
->page
<< TARGET_PAGE_BITS
;
976 p
= block
->host
+ offset
;
978 ret
= ram_control_save_page(rs
->f
, block
->offset
,
979 offset
, TARGET_PAGE_SIZE
, &bytes_xmit
);
981 rs
->bytes_transferred
+= bytes_xmit
;
984 if (ret
!= RAM_SAVE_CONTROL_NOT_SUPP
) {
985 if (ret
!= RAM_SAVE_CONTROL_DELAYED
) {
986 if (bytes_xmit
> 0) {
988 } else if (bytes_xmit
== 0) {
993 /* When starting the process of a new block, the first page of
994 * the block should be sent out before other pages in the same
995 * block, and all the pages in last block should have been sent
996 * out, keeping this order is important, because the 'cont' flag
997 * is used to avoid resending the block name.
999 if (block
!= rs
->last_sent_block
) {
1000 flush_compressed_data(rs
);
1001 pages
= save_zero_page(rs
, block
, offset
, p
);
1003 /* Make sure the first page is sent out before other pages */
1004 bytes_xmit
= save_page_header(rs
, rs
->f
, block
, offset
|
1005 RAM_SAVE_FLAG_COMPRESS_PAGE
);
1006 blen
= qemu_put_compression_data(rs
->f
, p
, TARGET_PAGE_SIZE
,
1007 migrate_compress_level());
1009 rs
->bytes_transferred
+= bytes_xmit
+ blen
;
1013 qemu_file_set_error(rs
->f
, blen
);
1014 error_report("compressed data failed!");
1018 ram_release_pages(block
->idstr
, offset
, pages
);
1021 pages
= save_zero_page(rs
, block
, offset
, p
);
1023 pages
= compress_page_with_multi_thread(rs
, block
, offset
);
1025 ram_release_pages(block
->idstr
, offset
, pages
);
1034 * find_dirty_block: find the next dirty page and update any state
1035 * associated with the search process.
1037 * Returns if a page is found
1039 * @rs: current RAM state
1040 * @pss: data about the state of the current dirty page scan
1041 * @again: set to false if the search has scanned the whole of RAM
1043 static bool find_dirty_block(RAMState
*rs
, PageSearchStatus
*pss
, bool *again
)
1045 pss
->page
= migration_bitmap_find_dirty(rs
, pss
->block
, pss
->page
);
1046 if (pss
->complete_round
&& pss
->block
== rs
->last_seen_block
&&
1047 pss
->page
>= rs
->last_page
) {
1049 * We've been once around the RAM and haven't found anything.
1055 if ((pss
->page
<< TARGET_PAGE_BITS
) >= pss
->block
->used_length
) {
1056 /* Didn't find anything in this RAM Block */
1058 pss
->block
= QLIST_NEXT_RCU(pss
->block
, next
);
1060 /* Hit the end of the list */
1061 pss
->block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1062 /* Flag that we've looped */
1063 pss
->complete_round
= true;
1064 rs
->ram_bulk_stage
= false;
1065 if (migrate_use_xbzrle()) {
1066 /* If xbzrle is on, stop using the data compression at this
1067 * point. In theory, xbzrle can do better than compression.
1069 flush_compressed_data(rs
);
1072 /* Didn't find anything this time, but try again on the new block */
1076 /* Can go around again, but... */
1078 /* We've found something so probably don't need to */
1084 * unqueue_page: gets a page of the queue
1086 * Helper for 'get_queued_page' - gets a page off the queue
1088 * Returns the block of the page (or NULL if none available)
1090 * @rs: current RAM state
1091 * @offset: used to return the offset within the RAMBlock
1093 static RAMBlock
*unqueue_page(RAMState
*rs
, ram_addr_t
*offset
)
1095 RAMBlock
*block
= NULL
;
1097 qemu_mutex_lock(&rs
->src_page_req_mutex
);
1098 if (!QSIMPLEQ_EMPTY(&rs
->src_page_requests
)) {
1099 struct RAMSrcPageRequest
*entry
=
1100 QSIMPLEQ_FIRST(&rs
->src_page_requests
);
1102 *offset
= entry
->offset
;
1104 if (entry
->len
> TARGET_PAGE_SIZE
) {
1105 entry
->len
-= TARGET_PAGE_SIZE
;
1106 entry
->offset
+= TARGET_PAGE_SIZE
;
1108 memory_region_unref(block
->mr
);
1109 QSIMPLEQ_REMOVE_HEAD(&rs
->src_page_requests
, next_req
);
1113 qemu_mutex_unlock(&rs
->src_page_req_mutex
);
1119 * get_queued_page: unqueue a page from the postocpy requests
1121 * Skips pages that are already sent (!dirty)
1123 * Returns if a queued page is found
1125 * @rs: current RAM state
1126 * @pss: data about the state of the current dirty page scan
1128 static bool get_queued_page(RAMState
*rs
, PageSearchStatus
*pss
)
1135 block
= unqueue_page(rs
, &offset
);
1137 * We're sending this page, and since it's postcopy nothing else
1138 * will dirty it, and we must make sure it doesn't get sent again
1139 * even if this queue request was received after the background
1140 * search already sent it.
1145 page
= offset
>> TARGET_PAGE_BITS
;
1146 dirty
= test_bit(page
, block
->bmap
);
1148 trace_get_queued_page_not_dirty(block
->idstr
, (uint64_t)offset
,
1149 page
, test_bit(page
, block
->unsentmap
));
1151 trace_get_queued_page(block
->idstr
, (uint64_t)offset
, page
);
1155 } while (block
&& !dirty
);
1159 * As soon as we start servicing pages out of order, then we have
1160 * to kill the bulk stage, since the bulk stage assumes
1161 * in (migration_bitmap_find_and_reset_dirty) that every page is
1162 * dirty, that's no longer true.
1164 rs
->ram_bulk_stage
= false;
1167 * We want the background search to continue from the queued page
1168 * since the guest is likely to want other pages near to the page
1169 * it just requested.
1172 pss
->page
= offset
>> TARGET_PAGE_BITS
;
1179 * migration_page_queue_free: drop any remaining pages in the ram
1182 * It should be empty at the end anyway, but in error cases there may
1183 * be some left. in case that there is any page left, we drop it.
1186 void migration_page_queue_free(void)
1188 struct RAMSrcPageRequest
*mspr
, *next_mspr
;
1189 RAMState
*rs
= &ram_state
;
1190 /* This queue generally should be empty - but in the case of a failed
1191 * migration might have some droppings in.
1194 QSIMPLEQ_FOREACH_SAFE(mspr
, &rs
->src_page_requests
, next_req
, next_mspr
) {
1195 memory_region_unref(mspr
->rb
->mr
);
1196 QSIMPLEQ_REMOVE_HEAD(&rs
->src_page_requests
, next_req
);
1203 * ram_save_queue_pages: queue the page for transmission
1205 * A request from postcopy destination for example.
1207 * Returns zero on success or negative on error
1209 * @rbname: Name of the RAMBLock of the request. NULL means the
1210 * same that last one.
1211 * @start: starting address from the start of the RAMBlock
1212 * @len: length (in bytes) to send
1214 int ram_save_queue_pages(const char *rbname
, ram_addr_t start
, ram_addr_t len
)
1217 RAMState
*rs
= &ram_state
;
1219 rs
->postcopy_requests
++;
1222 /* Reuse last RAMBlock */
1223 ramblock
= rs
->last_req_rb
;
1227 * Shouldn't happen, we can't reuse the last RAMBlock if
1228 * it's the 1st request.
1230 error_report("ram_save_queue_pages no previous block");
1234 ramblock
= qemu_ram_block_by_name(rbname
);
1237 /* We shouldn't be asked for a non-existent RAMBlock */
1238 error_report("ram_save_queue_pages no block '%s'", rbname
);
1241 rs
->last_req_rb
= ramblock
;
1243 trace_ram_save_queue_pages(ramblock
->idstr
, start
, len
);
1244 if (start
+len
> ramblock
->used_length
) {
1245 error_report("%s request overrun start=" RAM_ADDR_FMT
" len="
1246 RAM_ADDR_FMT
" blocklen=" RAM_ADDR_FMT
,
1247 __func__
, start
, len
, ramblock
->used_length
);
1251 struct RAMSrcPageRequest
*new_entry
=
1252 g_malloc0(sizeof(struct RAMSrcPageRequest
));
1253 new_entry
->rb
= ramblock
;
1254 new_entry
->offset
= start
;
1255 new_entry
->len
= len
;
1257 memory_region_ref(ramblock
->mr
);
1258 qemu_mutex_lock(&rs
->src_page_req_mutex
);
1259 QSIMPLEQ_INSERT_TAIL(&rs
->src_page_requests
, new_entry
, next_req
);
1260 qemu_mutex_unlock(&rs
->src_page_req_mutex
);
1271 * ram_save_target_page: save one target page
1273 * Returns the number of pages written
1275 * @rs: current RAM state
1276 * @ms: current migration state
1277 * @pss: data about the page we want to send
1278 * @last_stage: if we are at the completion stage
1280 static int ram_save_target_page(RAMState
*rs
, PageSearchStatus
*pss
,
1285 /* Check the pages is dirty and if it is send it */
1286 if (migration_bitmap_clear_dirty(rs
, pss
->block
, pss
->page
)) {
1288 * If xbzrle is on, stop using the data compression after first
1289 * round of migration even if compression is enabled. In theory,
1290 * xbzrle can do better than compression.
1292 if (migrate_use_compression() &&
1293 (rs
->ram_bulk_stage
|| !migrate_use_xbzrle())) {
1294 res
= ram_save_compressed_page(rs
, pss
, last_stage
);
1296 res
= ram_save_page(rs
, pss
, last_stage
);
1302 if (pss
->block
->unsentmap
) {
1303 clear_bit(pss
->page
, pss
->block
->unsentmap
);
1311 * ram_save_host_page: save a whole host page
1313 * Starting at *offset send pages up to the end of the current host
1314 * page. It's valid for the initial offset to point into the middle of
1315 * a host page in which case the remainder of the hostpage is sent.
1316 * Only dirty target pages are sent. Note that the host page size may
1317 * be a huge page for this block.
1318 * The saving stops at the boundary of the used_length of the block
1319 * if the RAMBlock isn't a multiple of the host page size.
1321 * Returns the number of pages written or negative on error
1323 * @rs: current RAM state
1324 * @ms: current migration state
1325 * @pss: data about the page we want to send
1326 * @last_stage: if we are at the completion stage
1328 static int ram_save_host_page(RAMState
*rs
, PageSearchStatus
*pss
,
1331 int tmppages
, pages
= 0;
1332 size_t pagesize_bits
=
1333 qemu_ram_pagesize(pss
->block
) >> TARGET_PAGE_BITS
;
1336 tmppages
= ram_save_target_page(rs
, pss
, last_stage
);
1343 } while ((pss
->page
& (pagesize_bits
- 1)) &&
1344 offset_in_ramblock(pss
->block
, pss
->page
<< TARGET_PAGE_BITS
));
1346 /* The offset we leave with is the last one we looked at */
1352 * ram_find_and_save_block: finds a dirty page and sends it to f
1354 * Called within an RCU critical section.
1356 * Returns the number of pages written where zero means no dirty pages
1358 * @rs: current RAM state
1359 * @last_stage: if we are at the completion stage
1361 * On systems where host-page-size > target-page-size it will send all the
1362 * pages in a host page that are dirty.
1365 static int ram_find_and_save_block(RAMState
*rs
, bool last_stage
)
1367 PageSearchStatus pss
;
1371 /* No dirty page as there is zero RAM */
1372 if (!ram_bytes_total()) {
1376 pss
.block
= rs
->last_seen_block
;
1377 pss
.page
= rs
->last_page
;
1378 pss
.complete_round
= false;
1381 pss
.block
= QLIST_FIRST_RCU(&ram_list
.blocks
);
1386 found
= get_queued_page(rs
, &pss
);
1389 /* priority queue empty, so just search for something dirty */
1390 found
= find_dirty_block(rs
, &pss
, &again
);
1394 pages
= ram_save_host_page(rs
, &pss
, last_stage
);
1396 } while (!pages
&& again
);
1398 rs
->last_seen_block
= pss
.block
;
1399 rs
->last_page
= pss
.page
;
1404 void acct_update_position(QEMUFile
*f
, size_t size
, bool zero
)
1406 uint64_t pages
= size
/ TARGET_PAGE_SIZE
;
1407 RAMState
*rs
= &ram_state
;
1410 rs
->zero_pages
+= pages
;
1412 rs
->norm_pages
+= pages
;
1413 rs
->bytes_transferred
+= size
;
1414 qemu_update_position(f
, size
);
1418 uint64_t ram_bytes_total(void)
1424 RAMBLOCK_FOREACH(block
) {
1425 total
+= block
->used_length
;
1431 void free_xbzrle_decoded_buf(void)
1433 g_free(xbzrle_decoded_buf
);
1434 xbzrle_decoded_buf
= NULL
;
1437 static void ram_migration_cleanup(void *opaque
)
1441 /* caller have hold iothread lock or is in a bh, so there is
1442 * no writing race against this migration_bitmap
1444 memory_global_dirty_log_stop();
1446 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1447 g_free(block
->bmap
);
1449 g_free(block
->unsentmap
);
1450 block
->unsentmap
= NULL
;
1453 XBZRLE_cache_lock();
1455 cache_fini(XBZRLE
.cache
);
1456 g_free(XBZRLE
.encoded_buf
);
1457 g_free(XBZRLE
.current_buf
);
1458 g_free(ZERO_TARGET_PAGE
);
1459 XBZRLE
.cache
= NULL
;
1460 XBZRLE
.encoded_buf
= NULL
;
1461 XBZRLE
.current_buf
= NULL
;
1463 XBZRLE_cache_unlock();
1466 static void ram_state_reset(RAMState
*rs
)
1468 rs
->last_seen_block
= NULL
;
1469 rs
->last_sent_block
= NULL
;
1471 rs
->last_version
= ram_list
.version
;
1472 rs
->ram_bulk_stage
= true;
1475 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1478 * 'expected' is the value you expect the bitmap mostly to be full
1479 * of; it won't bother printing lines that are all this value.
1480 * If 'todump' is null the migration bitmap is dumped.
1482 void ram_debug_dump_bitmap(unsigned long *todump
, bool expected
,
1483 unsigned long pages
)
1486 int64_t linelen
= 128;
1489 for (cur
= 0; cur
< pages
; cur
+= linelen
) {
1493 * Last line; catch the case where the line length
1494 * is longer than remaining ram
1496 if (cur
+ linelen
> pages
) {
1497 linelen
= pages
- cur
;
1499 for (curb
= 0; curb
< linelen
; curb
++) {
1500 bool thisbit
= test_bit(cur
+ curb
, todump
);
1501 linebuf
[curb
] = thisbit
? '1' : '.';
1502 found
= found
|| (thisbit
!= expected
);
1505 linebuf
[curb
] = '\0';
1506 fprintf(stderr
, "0x%08" PRIx64
" : %s\n", cur
, linebuf
);
1511 /* **** functions for postcopy ***** */
1513 void ram_postcopy_migrated_memory_release(MigrationState
*ms
)
1515 struct RAMBlock
*block
;
1517 RAMBLOCK_FOREACH(block
) {
1518 unsigned long *bitmap
= block
->bmap
;
1519 unsigned long range
= block
->used_length
>> TARGET_PAGE_BITS
;
1520 unsigned long run_start
= find_next_zero_bit(bitmap
, range
, 0);
1522 while (run_start
< range
) {
1523 unsigned long run_end
= find_next_bit(bitmap
, range
, run_start
+ 1);
1524 ram_discard_range(block
->idstr
, run_start
<< TARGET_PAGE_BITS
,
1525 (run_end
- run_start
) << TARGET_PAGE_BITS
);
1526 run_start
= find_next_zero_bit(bitmap
, range
, run_end
+ 1);
1532 * postcopy_send_discard_bm_ram: discard a RAMBlock
1534 * Returns zero on success
1536 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1537 * Note: At this point the 'unsentmap' is the processed bitmap combined
1538 * with the dirtymap; so a '1' means it's either dirty or unsent.
1540 * @ms: current migration state
1541 * @pds: state for postcopy
1542 * @start: RAMBlock starting page
1543 * @length: RAMBlock size
1545 static int postcopy_send_discard_bm_ram(MigrationState
*ms
,
1546 PostcopyDiscardState
*pds
,
1549 unsigned long end
= block
->used_length
>> TARGET_PAGE_BITS
;
1550 unsigned long current
;
1551 unsigned long *unsentmap
= block
->unsentmap
;
1553 for (current
= 0; current
< end
; ) {
1554 unsigned long one
= find_next_bit(unsentmap
, end
, current
);
1557 unsigned long zero
= find_next_zero_bit(unsentmap
, end
, one
+ 1);
1558 unsigned long discard_length
;
1561 discard_length
= end
- one
;
1563 discard_length
= zero
- one
;
1565 if (discard_length
) {
1566 postcopy_discard_send_range(ms
, pds
, one
, discard_length
);
1568 current
= one
+ discard_length
;
1578 * postcopy_each_ram_send_discard: discard all RAMBlocks
1580 * Returns 0 for success or negative for error
1582 * Utility for the outgoing postcopy code.
1583 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1584 * passing it bitmap indexes and name.
1585 * (qemu_ram_foreach_block ends up passing unscaled lengths
1586 * which would mean postcopy code would have to deal with target page)
1588 * @ms: current migration state
1590 static int postcopy_each_ram_send_discard(MigrationState
*ms
)
1592 struct RAMBlock
*block
;
1595 RAMBLOCK_FOREACH(block
) {
1596 PostcopyDiscardState
*pds
=
1597 postcopy_discard_send_init(ms
, block
->idstr
);
1600 * Postcopy sends chunks of bitmap over the wire, but it
1601 * just needs indexes at this point, avoids it having
1602 * target page specific code.
1604 ret
= postcopy_send_discard_bm_ram(ms
, pds
, block
);
1605 postcopy_discard_send_finish(ms
, pds
);
1615 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1617 * Helper for postcopy_chunk_hostpages; it's called twice to
1618 * canonicalize the two bitmaps, that are similar, but one is
1621 * Postcopy requires that all target pages in a hostpage are dirty or
1622 * clean, not a mix. This function canonicalizes the bitmaps.
1624 * @ms: current migration state
1625 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1626 * otherwise we need to canonicalize partially dirty host pages
1627 * @block: block that contains the page we want to canonicalize
1628 * @pds: state for postcopy
1630 static void postcopy_chunk_hostpages_pass(MigrationState
*ms
, bool unsent_pass
,
1632 PostcopyDiscardState
*pds
)
1634 RAMState
*rs
= &ram_state
;
1635 unsigned long *bitmap
= block
->bmap
;
1636 unsigned long *unsentmap
= block
->unsentmap
;
1637 unsigned int host_ratio
= block
->page_size
/ TARGET_PAGE_SIZE
;
1638 unsigned long pages
= block
->used_length
>> TARGET_PAGE_BITS
;
1639 unsigned long run_start
;
1641 if (block
->page_size
== TARGET_PAGE_SIZE
) {
1642 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1647 /* Find a sent page */
1648 run_start
= find_next_zero_bit(unsentmap
, pages
, 0);
1650 /* Find a dirty page */
1651 run_start
= find_next_bit(bitmap
, pages
, 0);
1654 while (run_start
< pages
) {
1655 bool do_fixup
= false;
1656 unsigned long fixup_start_addr
;
1657 unsigned long host_offset
;
1660 * If the start of this run of pages is in the middle of a host
1661 * page, then we need to fixup this host page.
1663 host_offset
= run_start
% host_ratio
;
1666 run_start
-= host_offset
;
1667 fixup_start_addr
= run_start
;
1668 /* For the next pass */
1669 run_start
= run_start
+ host_ratio
;
1671 /* Find the end of this run */
1672 unsigned long run_end
;
1674 run_end
= find_next_bit(unsentmap
, pages
, run_start
+ 1);
1676 run_end
= find_next_zero_bit(bitmap
, pages
, run_start
+ 1);
1679 * If the end isn't at the start of a host page, then the
1680 * run doesn't finish at the end of a host page
1681 * and we need to discard.
1683 host_offset
= run_end
% host_ratio
;
1686 fixup_start_addr
= run_end
- host_offset
;
1688 * This host page has gone, the next loop iteration starts
1689 * from after the fixup
1691 run_start
= fixup_start_addr
+ host_ratio
;
1694 * No discards on this iteration, next loop starts from
1695 * next sent/dirty page
1697 run_start
= run_end
+ 1;
1704 /* Tell the destination to discard this page */
1705 if (unsent_pass
|| !test_bit(fixup_start_addr
, unsentmap
)) {
1706 /* For the unsent_pass we:
1707 * discard partially sent pages
1708 * For the !unsent_pass (dirty) we:
1709 * discard partially dirty pages that were sent
1710 * (any partially sent pages were already discarded
1711 * by the previous unsent_pass)
1713 postcopy_discard_send_range(ms
, pds
, fixup_start_addr
,
1717 /* Clean up the bitmap */
1718 for (page
= fixup_start_addr
;
1719 page
< fixup_start_addr
+ host_ratio
; page
++) {
1720 /* All pages in this host page are now not sent */
1721 set_bit(page
, unsentmap
);
1724 * Remark them as dirty, updating the count for any pages
1725 * that weren't previously dirty.
1727 rs
->migration_dirty_pages
+= !test_and_set_bit(page
, bitmap
);
1732 /* Find the next sent page for the next iteration */
1733 run_start
= find_next_zero_bit(unsentmap
, pages
, run_start
);
1735 /* Find the next dirty page for the next iteration */
1736 run_start
= find_next_bit(bitmap
, pages
, run_start
);
1742 * postcopy_chuck_hostpages: discrad any partially sent host page
1744 * Utility for the outgoing postcopy code.
1746 * Discard any partially sent host-page size chunks, mark any partially
1747 * dirty host-page size chunks as all dirty. In this case the host-page
1748 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1750 * Returns zero on success
1752 * @ms: current migration state
1753 * @block: block we want to work with
1755 static int postcopy_chunk_hostpages(MigrationState
*ms
, RAMBlock
*block
)
1757 PostcopyDiscardState
*pds
=
1758 postcopy_discard_send_init(ms
, block
->idstr
);
1760 /* First pass: Discard all partially sent host pages */
1761 postcopy_chunk_hostpages_pass(ms
, true, block
, pds
);
1763 * Second pass: Ensure that all partially dirty host pages are made
1766 postcopy_chunk_hostpages_pass(ms
, false, block
, pds
);
1768 postcopy_discard_send_finish(ms
, pds
);
1773 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1775 * Returns zero on success
1777 * Transmit the set of pages to be discarded after precopy to the target
1778 * these are pages that:
1779 * a) Have been previously transmitted but are now dirty again
1780 * b) Pages that have never been transmitted, this ensures that
1781 * any pages on the destination that have been mapped by background
1782 * tasks get discarded (transparent huge pages is the specific concern)
1783 * Hopefully this is pretty sparse
1785 * @ms: current migration state
1787 int ram_postcopy_send_discard_bitmap(MigrationState
*ms
)
1789 RAMState
*rs
= &ram_state
;
1795 /* This should be our last sync, the src is now paused */
1796 migration_bitmap_sync(rs
);
1798 /* Easiest way to make sure we don't resume in the middle of a host-page */
1799 rs
->last_seen_block
= NULL
;
1800 rs
->last_sent_block
= NULL
;
1803 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1804 unsigned long pages
= block
->used_length
>> TARGET_PAGE_BITS
;
1805 unsigned long *bitmap
= block
->bmap
;
1806 unsigned long *unsentmap
= block
->unsentmap
;
1809 /* We don't have a safe way to resize the sentmap, so
1810 * if the bitmap was resized it will be NULL at this
1813 error_report("migration ram resized during precopy phase");
1817 /* Deal with TPS != HPS and huge pages */
1818 ret
= postcopy_chunk_hostpages(ms
, block
);
1825 * Update the unsentmap to be unsentmap = unsentmap | dirty
1827 bitmap_or(unsentmap
, unsentmap
, bitmap
, pages
);
1828 #ifdef DEBUG_POSTCOPY
1829 ram_debug_dump_bitmap(unsentmap
, true, pages
);
1832 trace_ram_postcopy_send_discard_bitmap();
1834 ret
= postcopy_each_ram_send_discard(ms
);
1841 * ram_discard_range: discard dirtied pages at the beginning of postcopy
1843 * Returns zero on success
1845 * @rbname: name of the RAMBlock of the request. NULL means the
1846 * same that last one.
1847 * @start: RAMBlock starting page
1848 * @length: RAMBlock size
1850 int ram_discard_range(const char *rbname
, uint64_t start
, size_t length
)
1854 trace_ram_discard_range(rbname
, start
, length
);
1857 RAMBlock
*rb
= qemu_ram_block_by_name(rbname
);
1860 error_report("ram_discard_range: Failed to find block '%s'", rbname
);
1864 ret
= ram_block_discard_range(rb
, start
, length
);
1872 static int ram_state_init(RAMState
*rs
)
1874 memset(rs
, 0, sizeof(*rs
));
1875 qemu_mutex_init(&rs
->bitmap_mutex
);
1876 qemu_mutex_init(&rs
->src_page_req_mutex
);
1877 QSIMPLEQ_INIT(&rs
->src_page_requests
);
1879 if (migrate_use_xbzrle()) {
1880 XBZRLE_cache_lock();
1881 ZERO_TARGET_PAGE
= g_malloc0(TARGET_PAGE_SIZE
);
1882 XBZRLE
.cache
= cache_init(migrate_xbzrle_cache_size() /
1885 if (!XBZRLE
.cache
) {
1886 XBZRLE_cache_unlock();
1887 error_report("Error creating cache");
1890 XBZRLE_cache_unlock();
1892 /* We prefer not to abort if there is no memory */
1893 XBZRLE
.encoded_buf
= g_try_malloc0(TARGET_PAGE_SIZE
);
1894 if (!XBZRLE
.encoded_buf
) {
1895 error_report("Error allocating encoded_buf");
1899 XBZRLE
.current_buf
= g_try_malloc(TARGET_PAGE_SIZE
);
1900 if (!XBZRLE
.current_buf
) {
1901 error_report("Error allocating current_buf");
1902 g_free(XBZRLE
.encoded_buf
);
1903 XBZRLE
.encoded_buf
= NULL
;
1908 /* For memory_global_dirty_log_start below. */
1909 qemu_mutex_lock_iothread();
1911 qemu_mutex_lock_ramlist();
1913 ram_state_reset(rs
);
1915 /* Skip setting bitmap if there is no RAM */
1916 if (ram_bytes_total()) {
1919 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1920 unsigned long pages
= block
->max_length
>> TARGET_PAGE_BITS
;
1922 block
->bmap
= bitmap_new(pages
);
1923 bitmap_set(block
->bmap
, 0, pages
);
1924 if (migrate_postcopy_ram()) {
1925 block
->unsentmap
= bitmap_new(pages
);
1926 bitmap_set(block
->unsentmap
, 0, pages
);
1932 * Count the total number of pages used by ram blocks not including any
1933 * gaps due to alignment or unplugs.
1935 rs
->migration_dirty_pages
= ram_bytes_total() >> TARGET_PAGE_BITS
;
1937 memory_global_dirty_log_start();
1938 migration_bitmap_sync(rs
);
1939 qemu_mutex_unlock_ramlist();
1940 qemu_mutex_unlock_iothread();
1947 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
1948 * long-running RCU critical section. When rcu-reclaims in the code
1949 * start to become numerous it will be necessary to reduce the
1950 * granularity of these critical sections.
1954 * ram_save_setup: Setup RAM for migration
1956 * Returns zero to indicate success and negative for error
1958 * @f: QEMUFile where to send the data
1959 * @opaque: RAMState pointer
1961 static int ram_save_setup(QEMUFile
*f
, void *opaque
)
1963 RAMState
*rs
= opaque
;
1966 /* migration has already setup the bitmap, reuse it. */
1967 if (!migration_in_colo_state()) {
1968 if (ram_state_init(rs
) < 0) {
1976 qemu_put_be64(f
, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE
);
1978 RAMBLOCK_FOREACH(block
) {
1979 qemu_put_byte(f
, strlen(block
->idstr
));
1980 qemu_put_buffer(f
, (uint8_t *)block
->idstr
, strlen(block
->idstr
));
1981 qemu_put_be64(f
, block
->used_length
);
1982 if (migrate_postcopy_ram() && block
->page_size
!= qemu_host_page_size
) {
1983 qemu_put_be64(f
, block
->page_size
);
1989 ram_control_before_iterate(f
, RAM_CONTROL_SETUP
);
1990 ram_control_after_iterate(f
, RAM_CONTROL_SETUP
);
1992 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
1998 * ram_save_iterate: iterative stage for migration
2000 * Returns zero to indicate success and negative for error
2002 * @f: QEMUFile where to send the data
2003 * @opaque: RAMState pointer
2005 static int ram_save_iterate(QEMUFile
*f
, void *opaque
)
2007 RAMState
*rs
= opaque
;
2014 if (ram_list
.version
!= rs
->last_version
) {
2015 ram_state_reset(rs
);
2018 /* Read version before ram_list.blocks */
2021 ram_control_before_iterate(f
, RAM_CONTROL_ROUND
);
2023 t0
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2025 while ((ret
= qemu_file_rate_limit(f
)) == 0) {
2028 pages
= ram_find_and_save_block(rs
, false);
2029 /* no more pages to sent */
2036 /* we want to check in the 1st loop, just in case it was the 1st time
2037 and we had to sync the dirty bitmap.
2038 qemu_get_clock_ns() is a bit expensive, so we only check each some
2041 if ((i
& 63) == 0) {
2042 uint64_t t1
= (qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - t0
) / 1000000;
2043 if (t1
> MAX_WAIT
) {
2044 trace_ram_save_iterate_big_wait(t1
, i
);
2050 flush_compressed_data(rs
);
2054 * Must occur before EOS (or any QEMUFile operation)
2055 * because of RDMA protocol.
2057 ram_control_after_iterate(f
, RAM_CONTROL_ROUND
);
2059 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2060 rs
->bytes_transferred
+= 8;
2062 ret
= qemu_file_get_error(f
);
2071 * ram_save_complete: function called to send the remaining amount of ram
2073 * Returns zero to indicate success
2075 * Called with iothread lock
2077 * @f: QEMUFile where to send the data
2078 * @opaque: RAMState pointer
2080 static int ram_save_complete(QEMUFile
*f
, void *opaque
)
2082 RAMState
*rs
= opaque
;
2086 if (!migration_in_postcopy()) {
2087 migration_bitmap_sync(rs
);
2090 ram_control_before_iterate(f
, RAM_CONTROL_FINISH
);
2092 /* try transferring iterative blocks of memory */
2094 /* flush all remaining blocks regardless of rate limiting */
2098 pages
= ram_find_and_save_block(rs
, !migration_in_colo_state());
2099 /* no more blocks to sent */
2105 flush_compressed_data(rs
);
2106 ram_control_after_iterate(f
, RAM_CONTROL_FINISH
);
2110 qemu_put_be64(f
, RAM_SAVE_FLAG_EOS
);
2115 static void ram_save_pending(QEMUFile
*f
, void *opaque
, uint64_t max_size
,
2116 uint64_t *non_postcopiable_pending
,
2117 uint64_t *postcopiable_pending
)
2119 RAMState
*rs
= opaque
;
2120 uint64_t remaining_size
;
2122 remaining_size
= rs
->migration_dirty_pages
* TARGET_PAGE_SIZE
;
2124 if (!migration_in_postcopy() &&
2125 remaining_size
< max_size
) {
2126 qemu_mutex_lock_iothread();
2128 migration_bitmap_sync(rs
);
2130 qemu_mutex_unlock_iothread();
2131 remaining_size
= rs
->migration_dirty_pages
* TARGET_PAGE_SIZE
;
2134 /* We can do postcopy, and all the data is postcopiable */
2135 *postcopiable_pending
+= remaining_size
;
2138 static int load_xbzrle(QEMUFile
*f
, ram_addr_t addr
, void *host
)
2140 unsigned int xh_len
;
2142 uint8_t *loaded_data
;
2144 if (!xbzrle_decoded_buf
) {
2145 xbzrle_decoded_buf
= g_malloc(TARGET_PAGE_SIZE
);
2147 loaded_data
= xbzrle_decoded_buf
;
2149 /* extract RLE header */
2150 xh_flags
= qemu_get_byte(f
);
2151 xh_len
= qemu_get_be16(f
);
2153 if (xh_flags
!= ENCODING_FLAG_XBZRLE
) {
2154 error_report("Failed to load XBZRLE page - wrong compression!");
2158 if (xh_len
> TARGET_PAGE_SIZE
) {
2159 error_report("Failed to load XBZRLE page - len overflow!");
2162 /* load data and decode */
2163 qemu_get_buffer_in_place(f
, &loaded_data
, xh_len
);
2166 if (xbzrle_decode_buffer(loaded_data
, xh_len
, host
,
2167 TARGET_PAGE_SIZE
) == -1) {
2168 error_report("Failed to load XBZRLE page - decode error!");
2176 * ram_block_from_stream: read a RAMBlock id from the migration stream
2178 * Must be called from within a rcu critical section.
2180 * Returns a pointer from within the RCU-protected ram_list.
2182 * @f: QEMUFile where to read the data from
2183 * @flags: Page flags (mostly to see if it's a continuation of previous block)
2185 static inline RAMBlock
*ram_block_from_stream(QEMUFile
*f
, int flags
)
2187 static RAMBlock
*block
= NULL
;
2191 if (flags
& RAM_SAVE_FLAG_CONTINUE
) {
2193 error_report("Ack, bad migration stream!");
2199 len
= qemu_get_byte(f
);
2200 qemu_get_buffer(f
, (uint8_t *)id
, len
);
2203 block
= qemu_ram_block_by_name(id
);
2205 error_report("Can't find block %s", id
);
2212 static inline void *host_from_ram_block_offset(RAMBlock
*block
,
2215 if (!offset_in_ramblock(block
, offset
)) {
2219 return block
->host
+ offset
;
2223 * ram_handle_compressed: handle the zero page case
2225 * If a page (or a whole RDMA chunk) has been
2226 * determined to be zero, then zap it.
2228 * @host: host address for the zero page
2229 * @ch: what the page is filled from. We only support zero
2230 * @size: size of the zero page
2232 void ram_handle_compressed(void *host
, uint8_t ch
, uint64_t size
)
2234 if (ch
!= 0 || !is_zero_range(host
, size
)) {
2235 memset(host
, ch
, size
);
2239 static void *do_data_decompress(void *opaque
)
2241 DecompressParam
*param
= opaque
;
2242 unsigned long pagesize
;
2246 qemu_mutex_lock(¶m
->mutex
);
2247 while (!param
->quit
) {
2252 qemu_mutex_unlock(¶m
->mutex
);
2254 pagesize
= TARGET_PAGE_SIZE
;
2255 /* uncompress() will return failed in some case, especially
2256 * when the page is dirted when doing the compression, it's
2257 * not a problem because the dirty page will be retransferred
2258 * and uncompress() won't break the data in other pages.
2260 uncompress((Bytef
*)des
, &pagesize
,
2261 (const Bytef
*)param
->compbuf
, len
);
2263 qemu_mutex_lock(&decomp_done_lock
);
2265 qemu_cond_signal(&decomp_done_cond
);
2266 qemu_mutex_unlock(&decomp_done_lock
);
2268 qemu_mutex_lock(¶m
->mutex
);
2270 qemu_cond_wait(¶m
->cond
, ¶m
->mutex
);
2273 qemu_mutex_unlock(¶m
->mutex
);
2278 static void wait_for_decompress_done(void)
2280 int idx
, thread_count
;
2282 if (!migrate_use_compression()) {
2286 thread_count
= migrate_decompress_threads();
2287 qemu_mutex_lock(&decomp_done_lock
);
2288 for (idx
= 0; idx
< thread_count
; idx
++) {
2289 while (!decomp_param
[idx
].done
) {
2290 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2293 qemu_mutex_unlock(&decomp_done_lock
);
2296 void migrate_decompress_threads_create(void)
2298 int i
, thread_count
;
2300 thread_count
= migrate_decompress_threads();
2301 decompress_threads
= g_new0(QemuThread
, thread_count
);
2302 decomp_param
= g_new0(DecompressParam
, thread_count
);
2303 qemu_mutex_init(&decomp_done_lock
);
2304 qemu_cond_init(&decomp_done_cond
);
2305 for (i
= 0; i
< thread_count
; i
++) {
2306 qemu_mutex_init(&decomp_param
[i
].mutex
);
2307 qemu_cond_init(&decomp_param
[i
].cond
);
2308 decomp_param
[i
].compbuf
= g_malloc0(compressBound(TARGET_PAGE_SIZE
));
2309 decomp_param
[i
].done
= true;
2310 decomp_param
[i
].quit
= false;
2311 qemu_thread_create(decompress_threads
+ i
, "decompress",
2312 do_data_decompress
, decomp_param
+ i
,
2313 QEMU_THREAD_JOINABLE
);
2317 void migrate_decompress_threads_join(void)
2319 int i
, thread_count
;
2321 thread_count
= migrate_decompress_threads();
2322 for (i
= 0; i
< thread_count
; i
++) {
2323 qemu_mutex_lock(&decomp_param
[i
].mutex
);
2324 decomp_param
[i
].quit
= true;
2325 qemu_cond_signal(&decomp_param
[i
].cond
);
2326 qemu_mutex_unlock(&decomp_param
[i
].mutex
);
2328 for (i
= 0; i
< thread_count
; i
++) {
2329 qemu_thread_join(decompress_threads
+ i
);
2330 qemu_mutex_destroy(&decomp_param
[i
].mutex
);
2331 qemu_cond_destroy(&decomp_param
[i
].cond
);
2332 g_free(decomp_param
[i
].compbuf
);
2334 g_free(decompress_threads
);
2335 g_free(decomp_param
);
2336 decompress_threads
= NULL
;
2337 decomp_param
= NULL
;
2340 static void decompress_data_with_multi_threads(QEMUFile
*f
,
2341 void *host
, int len
)
2343 int idx
, thread_count
;
2345 thread_count
= migrate_decompress_threads();
2346 qemu_mutex_lock(&decomp_done_lock
);
2348 for (idx
= 0; idx
< thread_count
; idx
++) {
2349 if (decomp_param
[idx
].done
) {
2350 decomp_param
[idx
].done
= false;
2351 qemu_mutex_lock(&decomp_param
[idx
].mutex
);
2352 qemu_get_buffer(f
, decomp_param
[idx
].compbuf
, len
);
2353 decomp_param
[idx
].des
= host
;
2354 decomp_param
[idx
].len
= len
;
2355 qemu_cond_signal(&decomp_param
[idx
].cond
);
2356 qemu_mutex_unlock(&decomp_param
[idx
].mutex
);
2360 if (idx
< thread_count
) {
2363 qemu_cond_wait(&decomp_done_cond
, &decomp_done_lock
);
2366 qemu_mutex_unlock(&decomp_done_lock
);
2370 * ram_postcopy_incoming_init: allocate postcopy data structures
2372 * Returns 0 for success and negative if there was one error
2374 * @mis: current migration incoming state
2376 * Allocate data structures etc needed by incoming migration with
2377 * postcopy-ram. postcopy-ram's similarly names
2378 * postcopy_ram_incoming_init does the work.
2380 int ram_postcopy_incoming_init(MigrationIncomingState
*mis
)
2382 unsigned long ram_pages
= last_ram_page();
2384 return postcopy_ram_incoming_init(mis
, ram_pages
);
2388 * ram_load_postcopy: load a page in postcopy case
2390 * Returns 0 for success or -errno in case of error
2392 * Called in postcopy mode by ram_load().
2393 * rcu_read_lock is taken prior to this being called.
2395 * @f: QEMUFile where to send the data
2397 static int ram_load_postcopy(QEMUFile
*f
)
2399 int flags
= 0, ret
= 0;
2400 bool place_needed
= false;
2401 bool matching_page_sizes
= false;
2402 MigrationIncomingState
*mis
= migration_incoming_get_current();
2403 /* Temporary page that is later 'placed' */
2404 void *postcopy_host_page
= postcopy_get_tmp_page(mis
);
2405 void *last_host
= NULL
;
2406 bool all_zero
= false;
2408 while (!ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
2411 void *page_buffer
= NULL
;
2412 void *place_source
= NULL
;
2413 RAMBlock
*block
= NULL
;
2416 addr
= qemu_get_be64(f
);
2417 flags
= addr
& ~TARGET_PAGE_MASK
;
2418 addr
&= TARGET_PAGE_MASK
;
2420 trace_ram_load_postcopy_loop((uint64_t)addr
, flags
);
2421 place_needed
= false;
2422 if (flags
& (RAM_SAVE_FLAG_ZERO
| RAM_SAVE_FLAG_PAGE
)) {
2423 block
= ram_block_from_stream(f
, flags
);
2425 host
= host_from_ram_block_offset(block
, addr
);
2427 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
2431 matching_page_sizes
= block
->page_size
== TARGET_PAGE_SIZE
;
2433 * Postcopy requires that we place whole host pages atomically;
2434 * these may be huge pages for RAMBlocks that are backed by
2436 * To make it atomic, the data is read into a temporary page
2437 * that's moved into place later.
2438 * The migration protocol uses, possibly smaller, target-pages
2439 * however the source ensures it always sends all the components
2440 * of a host page in order.
2442 page_buffer
= postcopy_host_page
+
2443 ((uintptr_t)host
& (block
->page_size
- 1));
2444 /* If all TP are zero then we can optimise the place */
2445 if (!((uintptr_t)host
& (block
->page_size
- 1))) {
2448 /* not the 1st TP within the HP */
2449 if (host
!= (last_host
+ TARGET_PAGE_SIZE
)) {
2450 error_report("Non-sequential target page %p/%p",
2459 * If it's the last part of a host page then we place the host
2462 place_needed
= (((uintptr_t)host
+ TARGET_PAGE_SIZE
) &
2463 (block
->page_size
- 1)) == 0;
2464 place_source
= postcopy_host_page
;
2468 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
2469 case RAM_SAVE_FLAG_ZERO
:
2470 ch
= qemu_get_byte(f
);
2471 memset(page_buffer
, ch
, TARGET_PAGE_SIZE
);
2477 case RAM_SAVE_FLAG_PAGE
:
2479 if (!place_needed
|| !matching_page_sizes
) {
2480 qemu_get_buffer(f
, page_buffer
, TARGET_PAGE_SIZE
);
2482 /* Avoids the qemu_file copy during postcopy, which is
2483 * going to do a copy later; can only do it when we
2484 * do this read in one go (matching page sizes)
2486 qemu_get_buffer_in_place(f
, (uint8_t **)&place_source
,
2490 case RAM_SAVE_FLAG_EOS
:
2494 error_report("Unknown combination of migration flags: %#x"
2495 " (postcopy mode)", flags
);
2500 /* This gets called at the last target page in the host page */
2501 void *place_dest
= host
+ TARGET_PAGE_SIZE
- block
->page_size
;
2504 ret
= postcopy_place_page_zero(mis
, place_dest
,
2507 ret
= postcopy_place_page(mis
, place_dest
,
2508 place_source
, block
->page_size
);
2512 ret
= qemu_file_get_error(f
);
2519 static int ram_load(QEMUFile
*f
, void *opaque
, int version_id
)
2521 int flags
= 0, ret
= 0;
2522 static uint64_t seq_iter
;
2525 * If system is running in postcopy mode, page inserts to host memory must
2528 bool postcopy_running
= postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING
;
2529 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2530 bool postcopy_advised
= postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE
;
2534 if (version_id
!= 4) {
2538 /* This RCU critical section can be very long running.
2539 * When RCU reclaims in the code start to become numerous,
2540 * it will be necessary to reduce the granularity of this
2545 if (postcopy_running
) {
2546 ret
= ram_load_postcopy(f
);
2549 while (!postcopy_running
&& !ret
&& !(flags
& RAM_SAVE_FLAG_EOS
)) {
2550 ram_addr_t addr
, total_ram_bytes
;
2554 addr
= qemu_get_be64(f
);
2555 flags
= addr
& ~TARGET_PAGE_MASK
;
2556 addr
&= TARGET_PAGE_MASK
;
2558 if (flags
& (RAM_SAVE_FLAG_ZERO
| RAM_SAVE_FLAG_PAGE
|
2559 RAM_SAVE_FLAG_COMPRESS_PAGE
| RAM_SAVE_FLAG_XBZRLE
)) {
2560 RAMBlock
*block
= ram_block_from_stream(f
, flags
);
2562 host
= host_from_ram_block_offset(block
, addr
);
2564 error_report("Illegal RAM offset " RAM_ADDR_FMT
, addr
);
2568 trace_ram_load_loop(block
->idstr
, (uint64_t)addr
, flags
, host
);
2571 switch (flags
& ~RAM_SAVE_FLAG_CONTINUE
) {
2572 case RAM_SAVE_FLAG_MEM_SIZE
:
2573 /* Synchronize RAM block list */
2574 total_ram_bytes
= addr
;
2575 while (!ret
&& total_ram_bytes
) {
2580 len
= qemu_get_byte(f
);
2581 qemu_get_buffer(f
, (uint8_t *)id
, len
);
2583 length
= qemu_get_be64(f
);
2585 block
= qemu_ram_block_by_name(id
);
2587 if (length
!= block
->used_length
) {
2588 Error
*local_err
= NULL
;
2590 ret
= qemu_ram_resize(block
, length
,
2593 error_report_err(local_err
);
2596 /* For postcopy we need to check hugepage sizes match */
2597 if (postcopy_advised
&&
2598 block
->page_size
!= qemu_host_page_size
) {
2599 uint64_t remote_page_size
= qemu_get_be64(f
);
2600 if (remote_page_size
!= block
->page_size
) {
2601 error_report("Mismatched RAM page size %s "
2602 "(local) %zd != %" PRId64
,
2603 id
, block
->page_size
,
2608 ram_control_load_hook(f
, RAM_CONTROL_BLOCK_REG
,
2611 error_report("Unknown ramblock \"%s\", cannot "
2612 "accept migration", id
);
2616 total_ram_bytes
-= length
;
2620 case RAM_SAVE_FLAG_ZERO
:
2621 ch
= qemu_get_byte(f
);
2622 ram_handle_compressed(host
, ch
, TARGET_PAGE_SIZE
);
2625 case RAM_SAVE_FLAG_PAGE
:
2626 qemu_get_buffer(f
, host
, TARGET_PAGE_SIZE
);
2629 case RAM_SAVE_FLAG_COMPRESS_PAGE
:
2630 len
= qemu_get_be32(f
);
2631 if (len
< 0 || len
> compressBound(TARGET_PAGE_SIZE
)) {
2632 error_report("Invalid compressed data length: %d", len
);
2636 decompress_data_with_multi_threads(f
, host
, len
);
2639 case RAM_SAVE_FLAG_XBZRLE
:
2640 if (load_xbzrle(f
, addr
, host
) < 0) {
2641 error_report("Failed to decompress XBZRLE page at "
2642 RAM_ADDR_FMT
, addr
);
2647 case RAM_SAVE_FLAG_EOS
:
2651 if (flags
& RAM_SAVE_FLAG_HOOK
) {
2652 ram_control_load_hook(f
, RAM_CONTROL_HOOK
, NULL
);
2654 error_report("Unknown combination of migration flags: %#x",
2660 ret
= qemu_file_get_error(f
);
2664 wait_for_decompress_done();
2666 trace_ram_load_complete(ret
, seq_iter
);
2670 static SaveVMHandlers savevm_ram_handlers
= {
2671 .save_live_setup
= ram_save_setup
,
2672 .save_live_iterate
= ram_save_iterate
,
2673 .save_live_complete_postcopy
= ram_save_complete
,
2674 .save_live_complete_precopy
= ram_save_complete
,
2675 .save_live_pending
= ram_save_pending
,
2676 .load_state
= ram_load
,
2677 .cleanup
= ram_migration_cleanup
,
2680 void ram_mig_init(void)
2682 qemu_mutex_init(&XBZRLE
.lock
);
2683 register_savevm_live(NULL
, "ram", 0, 4, &savevm_ram_handlers
, &ram_state
);